@frswataru (本石渉)posted at 2021-12-03

python 複数のexcelファイルを結合　一つのファイルに集約

Q&A

Closed

解決したいこと

pythonを使って複数のExcelファイルを結合して一つのファイルに集約したいです。
解決方法を教えて下さい。

発生している問題・エラー

--------------------------------------------------------------------------
XLRDError                                 Traceback (most recent call last)
<ipython-input-119-9994ca4c9bb7> in <module>
      2 
      3 for filepath in filepaths:
----> 4     _df = extract(filepath)
      5     df = pd.concat([df, _df])

<ipython-input-117-3019531b18b6> in extract(filepath)
      1 def extract(filepath):
----> 2     _df = pd.read_excel(filepath,sheet_name= 'チェックシート')
      3     wb = openpyxl.load_workbook(filepath)
      4     ws = wb['チェックシート']
      5     #最大行

~\AppData\Roaming\Python\Python37\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
    294                 )
    295                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296             return func(*args, **kwargs)
    297 
    298         return wrapper

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
    302 
    303     if not isinstance(io, ExcelFile):
--> 304         io = ExcelFile(io, engine=engine)
    305     elif engine and engine != io.engine:
    306         raise ValueError(

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in __init__(self, path_or_buffer, engine)
    865         self._io = stringify_path(path_or_buffer)
    866 
--> 867         self._reader = self._engines[engine](self._io)
    868 
    869     def __fspath__(self):

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer)
     20         err_msg = "Install xlrd >= 1.0.0 for Excel support"
     21         import_optional_dependency("xlrd", extra=err_msg)
---> 22         super().__init__(filepath_or_buffer)
     23 
     24     @property

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer)
    351             self.book = self.load_workbook(filepath_or_buffer)
    352         elif isinstance(filepath_or_buffer, str):
--> 353             self.book = self.load_workbook(filepath_or_buffer)
    354         elif isinstance(filepath_or_buffer, bytes):
    355             self.book = self.load_workbook(BytesIO(filepath_or_buffer))

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer)
     35             return open_workbook(file_contents=data)
     36         else:
---> 37             return open_workbook(filepath_or_buffer)
     38 
     39     @property

~\Anaconda3\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    155         formatting_info=formatting_info,
    156         on_demand=on_demand,
--> 157         ragged_rows=ragged_rows,
    158     )
    159     return bk

~\Anaconda3\lib\site-packages\xlrd\book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
     90         t1 = perf_counter()
     91         bk.load_time_stage_1 = t1 - t0
---> 92         biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
     93         if not biff_version:
     94             raise XLRDError("Can't determine file's BIFF version")

~\Anaconda3\lib\site-packages\xlrd\book.py in getbof(self, rqd_stream)
   1276             bof_error('Expected BOF record; met end of file')
   1277         if opcode not in bofcodes:
-> 1278             bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
   1279         length = self.get2bytes()
   1280         if length == MY_EOF:

~\Anaconda3\lib\site-packages\xlrd\book.py in bof_error(msg)
   1270 
   1271         def bof_error(msg):
-> 1272             raise XLRDError('Unsupported format, or corrupt file: ' + msg)
   1273         savpos = self._position
   1274         opcode = self.get2bytes()

XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'\x07\x96{\x90\xce \x8f\xc2'

該当するソースコード

import pandas as pd
from glob import glob
import openpyxl
filepaths= glob('保存データ/*メイン*.xlsx')
filepath = filepaths[0]
wb = openpyxl.load_workbook(filepath)
ws = wb['チェックシート']
_df = pd.read_excel(filepath,sheet_name= 'チェックシート')
#最大行
maxRow = ws.max_row + 1

#最大列
maxClm = ws.max_column + 1

#列ループ
for j in range(1,maxClm):

    #列を指定
    if j == 3:

        #行を逆ループ
        for i in reversed(range(1,maxRow)):

            #セルがNoneだったら
            if ws.cell(row=i, column=j).value != None:

                break
columns = _df.iloc[40,[1,2,5,6,7,10]]
df =_df.iloc[41:i-1,[1,2,5,6,7,10]]
df.columns = columns
df.columns=['コード', 
            '名称',
            '数量',
            '製造チェック項目',
            '指導情報',
            '色分け'
           ]


df['仕様'] = _df.iloc[0,1]
df['号車番号'] = _df.iloc[1,1][:1]
df['WO'] = _df.iloc[3,1][8:]
df['PJ'] = _df.iloc[4,1][8:]
df['ユーザー名'] = _df.iloc[5,1]
df['備考'] = _df.iloc[38,4]

def extract(filepath):
    _df = pd.read_excel(filepath,sheet_name= 'チェックシート')
    wb = openpyxl.load_workbook(filepath)
    ws = wb['チェックシート']
    #最大行
    maxRow = ws.max_row + 1

    #最大列
    maxClm = ws.max_column + 1

    #列ループ
    for j in range(1,maxClm):

        #列を指定
        if j == 3:

            #行を逆ループ
            for i in reversed(range(1,maxRow)):

                #セルがNoneだったら
                if ws.cell(row=i, column=j).value != None:

                    break
    df =_df.iloc[41:i-1,[1,2,5,6,7,10]]
    df.columns = columns
    df.columns=['コード', 
                '名称',
                '数量',
                '製造チェック項目',
                '指導情報',
                '色分け'
                ]
    df['仕様'] = _df.iloc[0,1]
    df['号車番号'] = _df.iloc[1,1][:1]
    df['WO'] = _df.iloc[3,1][8:]
    df['PJ'] = _df.iloc[4,1][8:]
    df['ユーザー名'] = _df.iloc[5,1]
    df['備考'] = _df.iloc[38,4]
    return df
df = extract(filepath)
df = pd.DataFrame()

for filepath in filepaths:
    _df = extract(filepath)
    df = pd.concat([df, _df])

df.to_excel('output/All_data.xlsx',index=False)

0 likes

Are you sure you want to delete the question?

python 複数のexcelファイルを結合　一つのファイルに集約

解決したいこと

発生している問題・エラー

該当するソースコード

1Answer

Comments

Your answer might help someone💌

python 複数のexcelファイルを結合 一つのファイルに集約

解決したいこと

発生している問題・エラー

該当するソースコード

1Answer

Comments