import pandas as pd
import jaconv
import re
# ファイルパスを指定
file_path_A = 'path_to_A.csv'
file_path_B = 'path_to_B.xlsx'
output_path_C = 'path_to_C.csv'
# ファイルの読み込み
df_A = pd.read_csv(file_path_A)
df_B = pd.read_excel(file_path_B, sheet_name=0, header=1)
# 正規化関数
def normalize(value):
if pd.isna(value):
return ''
value = str(value)
value = re.sub(r'\s+', '', value)
value = value.upper()
value = jaconv.h2z(value, ascii=True, digit=True)
return value
# Bの3列目を正規化して4列目に追加
df_B['normalized_col_3'] = df_B.iloc[:, 2].apply(normalize)
# 出力用のリスト
output_data = []
# Aの各行に対して処理を行う
for index, row in df_A.iterrows():
a_col4_normalized = normalize(row.iloc[3])
if a_col4_normalized.endswith('G'):
a_col4_normalized = a_col4_normalized[:-1]
match_rows = df_B[(df_B['normalized_col_3'] == a_col4_normalized) & (df_B.iloc[:, 1] == row.iloc[1])]
if not match_rows.empty:
for _, match_row in match_rows.iterrows():
output_data.append([a_col4_normalized, match_row.iloc[2]])
if not str(row.iloc[5]).startswith('4'):
a_col3_normalized = normalize(row.iloc[2])
if a_col3_normalized.endswith('G'):
a_col3_normalized = a_col3_normalized[:-1]
match_rows = df_B[(df_B['normalized_col_3'] == a_col3_normalized) & (df_B.iloc[:, 1] == row.iloc[1])]
if not match_rows.empty:
for _, match_row in match_rows.iterrows():
output_data.append([a_col3_normalized, match_row.iloc[2]])
# データフレームを作成してCSVに出力
df_output = pd.DataFrame(output_data, columns=['Normalized_A_col', 'Matched_B_col3'])
df_output.to_csv(output_path_C, index=False)
print("処理が完了しました。")
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme