ソース内の日本語を全部英語に変更するみたいな作業はよくあるとおもいますが、
下記の2つツールを使えば仕事をスムーズに進めると思います。
1)日本語抽出ツールを使って、システムの日本語を洗い出す、CSVの形で出力される。
2)CSVをローカライズしてもらう。
3)置換ツールを使って日本語を英語に変更する。
日本語抽出ツール
search.py
# -*- coding: utf-8 -*-
import os,sys
import re
import codecs
import unicodedata
import csv
import operator
reload(sys)
sys.setdefaultencoding("utf8")
# Language Translator
from googletrans import Translator
translator = Translator()
kanjiPattern = re.compile(u"[\u4E00-\u9FFF]+")
katakanaPattern=re.compile(u"[\u30a0-\u30ff]+")
hiraganaPattern=re.compile(u"[\u3040-\u309f]+")
except_file_type = ['.asta','.csv','.xlsx','.json','.json','.h','.sql','.class','.sh','.txt','.jpg','.png','.xls,','.svn-base']
except_file_like = r'test|debug|target|cocos2d|tools|Libraries'
#choice_tpye_list = ['.html','.js','.css','.java','.cpp','.mm','.xml']
choice_tpye_list = ['.js']
quationFiles_list = ['.js','.css','.java','.cpp','.mm']
save_path = ''
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
def matchJapanese(str):
return kanjiPattern.search(str) or katakanaPattern.search(str) or hiraganaPattern.search(str)
def start():
'''
argv[1]:検索先のファイル名 or パース
'''
global choice_tpye_list,save_path
try:
paths = sys.argv[1]
except Exception:
paths = ''
save_path = os.path.join(os.getcwd(),'check_jp_' + timestr +'.csv')
path_list = paths.split(',')
print("".join(choice_tpye_list))
for path in path_list:
checkDIR(path)
#文字数で並び替え、長い順
mycsv = csv.reader(open(save_path),delimiter='#')
result = sorted(mycsv, key=lambda x: int(x[2]), reverse=True)
with open(save_path,'wt') as csvfile1:
writer = csv.writer(csvfile1,delimiter='#', lineterminator='\n')
writer.writerows(result)
def sortcsvfiles(inputfilename, outputfilename):
with open(inputfilename,'rt') as csvfile1:
reader = csv.reader(csvfile1)
headers = next(reader, None)
rows = sorted(
(r for r in reader if len(r) > 1),
key=lambda r: (int(r[0]), int(r[1])))
with open(outputfilename,'wt') as csvfile1:
writer = csv.writer(csvfile1, lineterminator='\n')
if headers:
writer.writerow(headers)
writer.writerows(rows)
def checkDIR(path):
if os.path.isfile(path):
a,b = os.path.splitext(path)
if choice_tpye_list:
if b in choice_tpye_list:
replaceFile(path)
else:
if b not in except_file_type:
replaceFile(path)
elif os.path.isdir(path):
file_list = os.listdir(path)
path_list = map(lambda x: os.path.join(path, x), file_list)
for item in path_list:
checkDIR(item)
else:
print '---Wrong File---' + path
def removeComments(string):
if string.lstrip().lower().startswith(("new","CCAssert","qblogsv","log","cclog","public","cocos2d::log", "return", "void", "@", "*", "throw", ".", "console.log")):
string = "";
string = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"" ,string) #/*COMMENT */
string = re.sub(re.compile("//.*?\n" ) ,"" ,string) # //COMMENT
string = re.sub(re.compile("(<!--.*?-->)",re.DOTALL ) ,"" ,string)#<!--COMMENT-->)
for value in ("テスト","<%","実装","クラス", "LOG", "★", "//", "<<"):
if string.lower().find(value) > 0:
string = "";
break
#string = getStringFromHtml(string)
#string = re.sub('<[^<>]+>', '', string)
'''
p = re.compile(r"<[^>]*?>")
string = p.sub("", string)
p = re.compile(r'"([^"]*)"')
string = p.sub("", string)
string = re.sub('"(.*?)"', r'\1', string)
string = getStringFromHtml(string)
'''
'''
string = re.sub('<[^<>]+>', '', string)
if string.endswith(','):
string = string[:-1]
if getStringFromQua(string) != "":
string = getStringFromQua(string)
'''
return string.lstrip()
def getStringFromQua(string):
quoted = re.compile('"[^"]*"')
for value in quoted.findall(string):
if matchJapanese(value):
return "".join(value)
else:
return ""
def getStringFromHtml(raw_html):
#raw_html = re.sub('<[^>]+>', '', raw_html)
try:
raw_html = re.findall(r'>(.*)<', raw_html)[0]
except IndexError:
print("")
return raw_html
def ifHasQuationGetString(strOrgin):
quotedDouble = re.compile('"[^"]*"')
quotedSingle = re.compile("(?<=')[^']+(?=')")
matchDouble = re.search('"[^"]*"', strOrgin)
matchSingle = re.search("(?<=')[^']+(?=')", strOrgin)
string = ""
if matchDouble:
for value in quotedDouble.findall(strOrgin):
if value:
string = "".join(value)
if matchSingle:
for value in quotedSingle.findall(strOrgin):
if value:
string = "".join(value)
return string
def checkIfKeysInString(keys,string):
return any(s in string for s in keys)
def replaceFile(file):
num = 1
all_lis = []
lis = []
if any(re.findall(except_file_like, file, re.IGNORECASE)):
return False
with open(file, 'r') as f:
line = f.readline()
while line:
try:
line = line.decode('utf-8')
line = removeComments(line)
except Exception,e:
line = removeComments(line)
content_lis = line.split('#')
if matchJapanese(content_lis[0]):
strOrgin = content_lis[0]
string = content_lis[0]
string = getStringFromHtml(string)
if ifHasQuationGetString(string) != "":
string = ifHasQuationGetString(string)
if matchJapanese(string):
lis = [file, num, len(string.replace('\n', '').replace('\r', '').strip()),string.replace('\n', '').replace('\r', '').strip()]
all_lis.append(lis)
line = f.readline()
num += 1
with codecs.open(save_path, 'a', "utf-8") as f:
if all_lis:
for itme in all_lis:
f.write('%s#%s#%s#%s\n' % (itme[0],itme[1],itme[2],itme[3]))
if __name__ == '__main__':
start()
print 'Output:%s'%save_path
置換ツール
search.py
# -*- coding: utf-8 -*-
import csv
import fileinput
import os,sys
import re
import codecs
reload(sys)
sys.setdefaultencoding("utf8")
#ローカライズファイル
csvPath = 'C:/Users/r.cho/.m2/repository/git/magica/etc/tools/2019CPP.csv'
#ソースファイルD:\00_Docs\witch\sourcet
#source_dir = 'C:/Users/r.cho/.m2/repository/git/magica'
source_dir = 'C:/Users/r.cho/.m2/repository/git/client'
#source_dir = 'D:/00_Docs/witch/sourcet'
except_file_like = r'.git|test|target'
except_file_type = ['.java','.db','.md','.xml','.asta','.vfxb','.mtn','.plist','.plist','.ExportJson','.vfxj','.moc','.css','.csv','.xlsx','.json','.h','.sql','.class','.sh','.txt','.jpg','.png','.xls,','.svn-base']
#file_type = ['.html','.js','.css','.java','.cpp','.mm','.xml']
file_type = ['.cpp']
def start():
checkDIR(source_dir)
def checkDIR(path):
if os.path.isfile(path):
a,b = os.path.splitext(path)
#if b not in except_file_type:
if b in file_type:
replaceFile(path)
elif os.path.isdir(path):
file_list = os.listdir(path)
path_list = map(lambda x: os.path.join(path, x), file_list)
for item in path_list:
checkDIR(item)
else:
print '---Wrong File---' + path
def replaceFile(fileName):
print("###### START ##########")
#if any(re.findall(except_file_like, fileName, re.IGNORECASE)):
# return False
with open(fileName, 'r') as sourceFile:
sourceData = sourceFile.read()
with open(csvPath, 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='#')
for row in csv_reader:
if len(row) > 1:
sourceData = sourceData.replace(row[0], row[1])
with open(fileName, 'w') as file:
print("####" + fileName)
file.write(sourceData)
if __name__ == '__main__':
start()