0
0

a

Posted at
import pandas as pd
import jaconv
import re

# ファイルパスを指定
file_path_A = 'path_to_A.csv'
file_path_B = 'path_to_B.xlsx'
output_path_C = 'path_to_C.csv'

# ファイルの読み込み
df_A = pd.read_csv(file_path_A)
df_B = pd.read_excel(file_path_B, sheet_name=0, header=1)

# 正規化関数
def normalize(value):
    if pd.isna(value):
        return ''
    value = str(value)
    value = re.sub(r'\s+', '', value)
    value = value.upper()
    value = jaconv.h2z(value, ascii=True, digit=True)
    return value

# Bの3列目を正規化して4列目に追加
df_B['normalized_col_3'] = df_B.iloc[:, 2].apply(normalize)

# 出力用のリスト
output_data = []

# Aの各行に対して処理を行う
for index, row in df_A.iterrows():
    a_col4_normalized = normalize(row.iloc[3])
    
    if a_col4_normalized.endswith('G'):
        a_col4_normalized = a_col4_normalized[:-1]
    
    match_rows = df_B[(df_B['normalized_col_3'] == a_col4_normalized) & (df_B.iloc[:, 1] == row.iloc[1])]
    
    if not match_rows.empty:
        for _, match_row in match_rows.iterrows():
            output_data.append([a_col4_normalized, match_row.iloc[2]])

    if not str(row.iloc[5]).startswith('4'):
        a_col3_normalized = normalize(row.iloc[2])
        
        if a_col3_normalized.endswith('G'):
            a_col3_normalized = a_col3_normalized[:-1]
        
        match_rows = df_B[(df_B['normalized_col_3'] == a_col3_normalized) & (df_B.iloc[:, 1] == row.iloc[1])]
        
        if not match_rows.empty:
            for _, match_row in match_rows.iterrows():
                output_data.append([a_col3_normalized, match_row.iloc[2]])

# データフレームを作成してCSVに出力
df_output = pd.DataFrame(output_data, columns=['Normalized_A_col', 'Matched_B_col3'])
df_output.to_csv(output_path_C, index=False)

print("処理が完了しました。")
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0