ディレクトリを再帰的に検索してテキストファイルを参照して文字列をサーチします。
jupyterでやってみた
ソース
search_txt.py
# -*- cording: cp932 -*-
from pathlib import Path
import re
def main():
search_pattern = r'[aA][bB][cC]'
target_path = "D:\\__work\\test\\"
out_file = "D:\\__work\\search_result.tsv"
ignore_suffix_list = ["gif", "jpeg"]
search_txt(search_pattern, target_path, out_file, ignore_suffix_list)
def search_txt(search_pattern, target_path, out_file, ignore_suffix_list):
str_header = "主ファイル名" + "\t" + "拡張子" + "\t" + "フォルダ"
str_header = str_header + "\t" + "Line No" + "\t" + "Line"
fwrite = open(out_file, "w")
fwrite.write(str_header + "\n")
p = Path(target_path)
glob_list = list(p.glob("**/*"))
for each_path in glob_list:
if each_path.is_file():
file = each_path
stem = file.stem
suffix = file.suffix.replace(".", "")
file_name = str(file.name)
folder = str(file).replace(target_path, "").replace("\\" + file_name, "")
target = True
for ignore_suffix in ignore_suffix_list:
if suffix.find(ignore_suffix) != -1:
target = False
if target:
all_content = file.read_text()
split_list = all_content.split("\n")
line_no = 0
for content in split_list:
line_no = line_no + 1
result = re.search(search_pattern, content)
if result:
edited_line = content.replace("\t", " ")
str_detail = stem + "\t" + suffix + "\t" + folder
str_detail = str_detail + "\t" + str(line_no) + "\t" + edited_line
fwrite.write(str_detail + "\n")
fwrite.close()
if __name__ == "__main__":
main()
結果例
search_result.tsv
主ファイル名 拡張子 フォルダ Line No Line
test1 txt test1.txt 2 abc
test2 txt test2.txt 3 ABc
test3 txt sub 4 fffAbCppp