python 複数のexcelファイルを結合 一つのファイルに集約
Q&A
Closed
解決したいこと
pythonを使って複数のExcelファイルを結合して一つのファイルに集約したいです。
解決方法を教えて下さい。
発生している問題・エラー
--------------------------------------------------------------------------
XLRDError Traceback (most recent call last)
<ipython-input-119-9994ca4c9bb7> in <module>
2
3 for filepath in filepaths:
----> 4 _df = extract(filepath)
5 df = pd.concat([df, _df])
<ipython-input-117-3019531b18b6> in extract(filepath)
1 def extract(filepath):
----> 2 _df = pd.read_excel(filepath,sheet_name= 'チェックシート')
3 wb = openpyxl.load_workbook(filepath)
4 ws = wb['チェックシート']
5 #最大行
~\AppData\Roaming\Python\Python37\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
294 )
295 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
302
303 if not isinstance(io, ExcelFile):
--> 304 io = ExcelFile(io, engine=engine)
305 elif engine and engine != io.engine:
306 raise ValueError(
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in __init__(self, path_or_buffer, engine)
865 self._io = stringify_path(path_or_buffer)
866
--> 867 self._reader = self._engines[engine](self._io)
868
869 def __fspath__(self):
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer)
20 err_msg = "Install xlrd >= 1.0.0 for Excel support"
21 import_optional_dependency("xlrd", extra=err_msg)
---> 22 super().__init__(filepath_or_buffer)
23
24 @property
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer)
351 self.book = self.load_workbook(filepath_or_buffer)
352 elif isinstance(filepath_or_buffer, str):
--> 353 self.book = self.load_workbook(filepath_or_buffer)
354 elif isinstance(filepath_or_buffer, bytes):
355 self.book = self.load_workbook(BytesIO(filepath_or_buffer))
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer)
35 return open_workbook(file_contents=data)
36 else:
---> 37 return open_workbook(filepath_or_buffer)
38
39 @property
~\Anaconda3\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
155 formatting_info=formatting_info,
156 on_demand=on_demand,
--> 157 ragged_rows=ragged_rows,
158 )
159 return bk
~\Anaconda3\lib\site-packages\xlrd\book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
90 t1 = perf_counter()
91 bk.load_time_stage_1 = t1 - t0
---> 92 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
93 if not biff_version:
94 raise XLRDError("Can't determine file's BIFF version")
~\Anaconda3\lib\site-packages\xlrd\book.py in getbof(self, rqd_stream)
1276 bof_error('Expected BOF record; met end of file')
1277 if opcode not in bofcodes:
-> 1278 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
1279 length = self.get2bytes()
1280 if length == MY_EOF:
~\Anaconda3\lib\site-packages\xlrd\book.py in bof_error(msg)
1270
1271 def bof_error(msg):
-> 1272 raise XLRDError('Unsupported format, or corrupt file: ' + msg)
1273 savpos = self._position
1274 opcode = self.get2bytes()
XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'\x07\x96{\x90\xce \x8f\xc2'
該当するソースコード
import pandas as pd
from glob import glob
import openpyxl
filepaths= glob('保存データ/*メイン*.xlsx')
filepath = filepaths[0]
wb = openpyxl.load_workbook(filepath)
ws = wb['チェックシート']
_df = pd.read_excel(filepath,sheet_name= 'チェックシート')
#最大行
maxRow = ws.max_row + 1
#最大列
maxClm = ws.max_column + 1
#列ループ
for j in range(1,maxClm):
#列を指定
if j == 3:
#行を逆ループ
for i in reversed(range(1,maxRow)):
#セルがNoneだったら
if ws.cell(row=i, column=j).value != None:
break
columns = _df.iloc[40,[1,2,5,6,7,10]]
df =_df.iloc[41:i-1,[1,2,5,6,7,10]]
df.columns = columns
df.columns=['コード',
'名称',
'数量',
'製造チェック項目',
'指導情報',
'色分け'
]
df['仕様'] = _df.iloc[0,1]
df['号車番号'] = _df.iloc[1,1][:1]
df['WO'] = _df.iloc[3,1][8:]
df['PJ'] = _df.iloc[4,1][8:]
df['ユーザー名'] = _df.iloc[5,1]
df['備考'] = _df.iloc[38,4]
def extract(filepath):
_df = pd.read_excel(filepath,sheet_name= 'チェックシート')
wb = openpyxl.load_workbook(filepath)
ws = wb['チェックシート']
#最大行
maxRow = ws.max_row + 1
#最大列
maxClm = ws.max_column + 1
#列ループ
for j in range(1,maxClm):
#列を指定
if j == 3:
#行を逆ループ
for i in reversed(range(1,maxRow)):
#セルがNoneだったら
if ws.cell(row=i, column=j).value != None:
break
df =_df.iloc[41:i-1,[1,2,5,6,7,10]]
df.columns = columns
df.columns=['コード',
'名称',
'数量',
'製造チェック項目',
'指導情報',
'色分け'
]
df['仕様'] = _df.iloc[0,1]
df['号車番号'] = _df.iloc[1,1][:1]
df['WO'] = _df.iloc[3,1][8:]
df['PJ'] = _df.iloc[4,1][8:]
df['ユーザー名'] = _df.iloc[5,1]
df['備考'] = _df.iloc[38,4]
return df
df = extract(filepath)
df = pd.DataFrame()
for filepath in filepaths:
_df = extract(filepath)
df = pd.concat([df, _df])
df.to_excel('output/All_data.xlsx',index=False)
0