csv定義 推測関数
def sniffer(dat:str,
delimiters = ['\t', ';', ',', ':', '|', ' '],
quotechars = ['"', '\'', '`'],
escapechars = ['\\', '^'],
):
# guess lineterminator
LF = ""
lines = 1
if ('\r' in dat):
LF += '\r'
if ('\n' in dat):
LF += '\n'
lines = dat.count('\n')
if (LF == "" and '0x0' in dat):
LF = '0x0'
lines = dat.count('0x0')
# guess delimiter
mxdlm = 0
delimiter = None
_it_delim = ((dat.count(x), x) for x in delimiters if x in dat)
try: mxdlm, delimiter = max((v, x) for v, x in _it_delim if (v / lines) >= 1)
except ValueError: pass
# guess quotes
mxqt = 0
quotechar = '"'
_it_quot = ((dat.count(x), x) for x in quotechars if x in dat)
try: mxqt, quotechar = max((v, x) for v, x in _it_quot if v % 2 == 0)
except ValueError: pass
# guess escapechar, etc
if quotechar:
escapechar, doublequote = next(((esc, False) for esc in escapechars if (esc + quotechar) in dat), (None, True))
return dict(
delimiter=delimiter,
doublequote=doublequote,
lineterminator=LF,
quoting = 1 if quotechar and mxqt > mxdlm * 2 else 0,
quotechar=quotechar,
skipinitialspace = delimiter and (delimiter + ' ') in dat)
使い方
>>> csvdat = """n,aa
1,1
2,あ
"""
>>> print(sniffer(csvdat))
{'delimiter': ',', 'doublequote': True, 'escapechar': None, 'lineterminator': '\n', 'quotechar': '"', 'quoting': 0, 'skipinitialspace': False}
>>> import csv
>>> from io import StringIO
>>> r = csv.reader(StringIO(csvdat), **sniffer(csvdat))
>>> print(list(r))
[['n', 'aa'], ['1', '1'], ['2', 'あ']]
以上csv.Sniffer 車輪の再発明