【初心者🔰】生成AIを使って複数Excelファイルをまとめてみました

Last updated at 2025-01-22Posted at 2025-01-22

1. きっかけ

職場のチームリーダーが毎月のミーティング時に残業に関する注意喚起をおこなっていました。
データをまとめて、可視化できるかなと思い、作成してみました。
各個人の勤怠を１つのファイルにまとめてグラフ化して視覚的に見えるようにしました。生成AI（CopilotとGemini）を活用して、このツールを作成しました

2. 処理内容

同一フォルダ内の複数Excelファイルをまとめて1つのファイルにする
各氏名ごとに1カ月の基準総稼働時間、基本勤務時間（iniファイルで設定）、残業時間を出力
グラフ化したPNGファイルを同一シートに表示
別シートに平均残業時間、標準偏差、中央値を出力

3. 前提条件

各個人ごとの勤怠Excelには氏名と総稼働時間が設定されていること
Iniファイルにはフォルダのパス・各月の基本勤務時間が設定されていること

4. 生成AI(Copilot・Geminiに作成してもらったもの)

架空の勤怠データ（勤怠の内容もプロンプトで指示）
架空の基本総稼働時間データ
Pythonコード
テストパターン（今回は成功ケースのみ実施）

5. Pythonコード

注意：このコードは動作保証を行いません。
環境や設定によっては正常に動作しない場合がありますので、
ご自身の責任においてご使用ください。

import configparser
import pandas as pd
import glob
import os
import logging
import re
import matplotlib.pyplot as plt
import seaborn as sns
from openpyxl import Workbook
from openpyxl.drawing.image import Image
from openpyxl import load_workbook
from datetime import datetime
from PIL import Image as pil_image

plt.rcParams['font.family'] = 'Meiryo' # デフォルトフォントを変更 

# ログの設定
logging.basicConfig(filename='attendance_log.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 設定ファイルの読み込み
config = configparser.ConfigParser()
config.read('config.ini')
path = config['Paths']['folder_path']
pattern = config['Paths']['file_pattern']

# 月の番号と英語名の対応辞書を定義
months_dict = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'
}

def load_files(path, pattern):
    try:
        all_files = glob.glob(os.path.join(path, '*'))
        regex = re.compile(pattern)
        matched_files = [file for file in all_files if regex.match(os.path.basename(file))]
        file_count = len(matched_files)
        if file_count == 0:
            raise FileNotFoundError(f"指定されたパターンに一致するファイルが見つかりませんでした。パス: {path}, パターン: {pattern}")
        elif file_count > 100:
            response = input(f"フォルダ内には {file_count} 件のファイルがあります。処理を続行しますか？ (Yes/No): ")
            if response.lower() != 'yes':
                print("処理がキャンセルされました。")
                exit()
        logging.info(f'{file_count} files loaded successfully. Path: {path}, Pattern: {pattern}')
        return matched_files
    except Exception as e:
        logging.error(f"ファイルの読み込みエラー: {e}")
        exit()

def read_data(files):
    data_list = []
    try:
        for file in files:
            workbook = load_workbook(file, data_only=True)
            sheet = workbook.active
            total_work_time_str = sheet['C4'].value
            try:
                total_work_time_parts = total_work_time_str.split(':')
                hours = int(total_work_time_parts[0])
                minutes = int(total_work_time_parts[1])
                total_work_time = hours + minutes / 60
            except (ValueError, IndexError, AttributeError):
                print(f"ファイル {file} の C4セルの値が無効です: {total_work_time_str}")
                total_work_time = 0
            print(f"総稼働時間取得結果 (C4): {total_work_time}")
            df = pd.read_excel(file, header=None)
            print(f"ファイル名: {file} の先頭10行:\n{df.head(10)}")
            data = df.iloc[5:].reset_index(drop=True)
            data.columns = data.iloc[0]
            data = data.drop(0).reset_index(drop=True)
            print(f"列名の確認: {data.columns}")
            data = data[['日付', '稼働時間']].copy()
            data['氏名'] = os.path.basename(file).split('_')[0]
            data['総稼働時間'] = total_work_time
            print(f"氏名抽出結果: {data['氏名'][0]}")
            data_list.append(data)
        combined_df = pd.concat(data_list, ignore_index=True)
        combined_df = combined_df.drop_duplicates()
        combined_df['稼働時間'] = combined_df['稼働時間'].fillna(0)
        logging.info('All data files read and concatenated successfully.')
        print(f"データフレームの先頭:\n{combined_df.head()}")
        return combined_df
    except Exception as e:
        logging.error(f"データの読み込みエラー: {e}")
        exit()

def calculate_working_days(df):
    try:
        df['勤務日'] = df['稼働時間'].apply(lambda x: 1 if pd.notna(x) and x != '-' else 0)
        working_days_df = df.groupby('氏名')['勤務日'].sum().reset_index()
        logging.info('Working days calculated successfully.')
        return working_days_df
    except Exception as e:
        logging.error(f"勤務日数の計算エラー: {e}")
        exit()

def convert_time_to_hours(time_str):
    if isinstance(time_str, str) and not pd.isna(time_str):
        try:
            print(f"変換対象の稼働時間: {time_str}")
            if ":" not in time_str:
                time_str = f"{int(time_str):02d}:00:00"
            t = pd.to_timedelta(time_str)
            return t.total_seconds() / 3600
        except ValueError:
            return 0
    return 0

def calculate_overtime(df, working_days_df, working_hours, working_days):
    try:
        daily_working_hours = working_hours / working_days
        df['稼働時間'] = df['稼働時間'].apply(lambda x: convert_time_to_hours(x) if pd.notna(x) else 0)
        df['総稼働時間'] = df['総稼働時間'].apply(lambda x: x if isinstance(x, (int, float)) else convert_time_to_hours(x))
        total_hours_df = df.groupby('氏名', as_index=False).agg({'総稼働時間': 'sum'})
        total_hours_df['基本勤務時間'] = working_hours
        total_hours_df['残業時間'] = total_hours_df['総稼働時間'] - total_hours_df['基本勤務時間']
        logging.info('Overtime calculated successfully.')
        print(f"残業時間計算結果:\n{total_hours_df}")
        return total_hours_df
    except Exception as e:
        logging.error(f"残業時間の計算エラー: {e}")
        exit()

def create_overtime_chart(df):
    try:
        plt.figure(figsize=(10, 6))
        plt.bar(df['氏名'], df['残業時間'])
        plt.title('残業時間の比較')
        plt.xlabel('氏名')
        plt.ylabel('残業時間 (時間)')
        plt.xticks(rotation=45, ha='right')  # x軸のラベルを右寄せに設定
        plt.yticks(rotation=45, ha='right')  # y軸のラベルを右寄せに設定
        plt.tight_layout()
        chart_filename = 'overtime_chart.png'
        plt.savefig(chart_filename)
        logging.info('Overtime chart created successfully.')
        return chart_filename
    except Exception as e:
        logging.error(f"グラフのプロットエラー: {e}")
        print(f"グラフのプロットエラー: {e}")
        return None

#既存のExcelファイルに新しいデータを追加します
def save_to_excel(df, filename):
    try:
        workbook = load_workbook(filename)
        sheet = workbook.active
        for row in df.itertuples(index=True, name='Pandas'):
            sheet.append(row[1:])
        workbook.save(filename)
        logging.info(f"Excel file saved successfully: {filename}")
    except Exception as e:
        logging.error(f"Excel save error: {e}")
        print(f"Excel save error: {e}")

def calculate_statistics(df):
    try:
        mean_overtime = df['残業時間'].mean()
        std_overtime = df['残業時間'].std()
        median_overtime = df['残業時間'].median()
        logging.info('Statistics calculated successfully.')
        return pd.DataFrame({
            '平均残業時間': [mean_overtime],
            '標準偏差': [std_overtime],
            '中央値': [median_overtime]
        })
    except Exception as e:
        logging.error(f"統計量の計算エラー: {e}")
        exit()

def plot_overtime(df):
    try:
        plt.figure(figsize=(10, 6))
        sns.barplot(x='氏名', y='残業時間', data=df, ci=None)
        plt.title('残業時間の比較')
        plt.xlabel('氏名')
        plt.ylabel('残業時間 (時間)')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig('overtime_chart.png')
        plt.show()
        logging.info('Overtime chart plotted successfully.')
    except Exception as e:
        logging.error(f"グラフのプロットエラー: {e}")
        print(f"グラフのプロットエラー: {e}")
        
# 新しいExcelファイルを作成し、データと統計情報を保存します
def save_to_excel(df, statistics_df, working_hours, filename):
    try:
        df['基本勤務時間'] = working_hours
        today_date = datetime.now().strftime('%Y%m')
        output_filename = f"{filename}_{today_date}.xlsx"
        with pd.ExcelWriter(output_filename) as writer:
            df.to_excel(writer, sheet_name='Overtime Summary', index=False)
            statistics_df.to_excel(writer, sheet_name='Statistics', index=False)
        logging.info(f'Data saved to Excel file: {output_filename}')
        print(f'Data saved to Excel file: {output_filename}')
    except Exception as e:
        logging.error(f"データのExcelへの保存エラー: {e}")
        print(f"データのExcelへの保存エラー: {e}")

def save_to_excel_with_chart(df, statistics_df, working_hours, filename):
    try:
        df['基本勤務時間'] = working_hours
        today_date = datetime.now().strftime('%Y%m')
        output_filename = f"{filename}_{today_date}.xlsx"
        with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name='Overtime Summary', index=False)
            statistics_df.to_excel(writer, sheet_name='Statistics', index=False)
        workbook = load_workbook(output_filename)
        sheet = workbook['Overtime Summary']
        chart_filename = create_overtime_chart(df)
        if chart_filename:
            img = Image(chart_filename)
            img.anchor = 'E2'  # グラフをE2セルに配置
            sheet.add_image(img)          
        workbook.save(output_filename)
        logging.info(f'Data and chart saved to Excel file: {output_filename}')
        print(f'Data and chart saved to Excel file: {output_filename}')
    except Exception as e:
        logging.error(f"データとグラフのExcelへの保存エラー: {e}")
        print(f"データとグラフのExcelへの保存エラー: {e}")

def main():
    # メイン処理
    files = load_files(path, pattern)
    combined_df = read_data(files)
    combined_df = combined_df.drop_duplicates(subset=['氏名'])
    working_days_df = calculate_working_days(combined_df)
    current_month_number = datetime.now().month
    current_month_name = months_dict[current_month_number]
    working_hours = config.getint('WorkingHours', current_month_name)
    working_days = config.getint('WorkingDays', current_month_name)
    print(f"基本勤務時間 (hours) for {current_month_name}: {working_hours}")
    print(f"勤務日数 (days) for {current_month_name}: {working_days}")
    total_hours_df = calculate_overtime(combined_df, working_days_df, working_hours, working_days)
    statistics_df = calculate_statistics(total_hours_df)
    save_to_excel_with_chart(total_hours_df, statistics_df, working_hours, 'overtime_summary')

if __name__ == "__main__":
    main()

6. テスト一覧

№	形式	テスト内容	目的
1	正常系	正しいパスとパターンのファイルを読み込む	ファイル読み込み処理が正常に動作することを確認する
2	異常系	存在しないパスやパターンのファイルを読み込む	ファイル読み込みエラーが適切に処理されることを確認する
3	正常系	正しいフォーマットのExcelファイルからデータを抽出	データ抽出処理が正常に動作することを確認する
4	異常系	不正なフォーマットのExcelファイルからデータを抽出	データ抽出エラーが適切に処理されることを確認する
5	正常系	勤務日数を計算する	勤務日数計算処理が正常に動作することを確認する
6	正常系	残業時間を計算する	残業時間計算処理が正常に動作することを確認する
7	異常系	不正なデータを入力として与え、残業時間を計算する	残業時間計算エラーが適切に処理されることを確認する
8	正常系	グラフを作成し、Excelファイルに保存する	グラフ作成および保存処理が正常に動作することを確認する
9	正常系	統計情報を計算し、Excelファイルに保存する	統計情報計算および保存処理が正常に動作することを確認する
10	正常系	全体の処理を関数としてまとめ、実行ファイルとして実行する	ツール全体が正常に動作することを確認する

7. 実際の画面

見栄えの設定を全くしていません

8. 修正・変更履歴

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up