37,721
社区成员
发帖
与我相关
我的任务
分享
#filename Seek.py
import unicodedata
import sys
import os
class Seek():
"""
功能:查找中文,并替换成指定字符或字符串
使用方法:python脚本用法
参数说明:
-d : 文件目录(绝对或相对路径)(默认为脚本所在目录)
-t : 文件类型(文件名后缀,如.jsp;.txt)(默认为所有文件)
-sf : 是否包括子目录(Y|N)(option,默认不包括子目录)
-r : 输出文件名(option,默认为'ChineseCharacter.txt'),位于脚本目录下
-encoding:文件字符编码(默认为utf-8)
"""
def __init__(self):
"""
初始化查找程序
参数解析
"""
#TODO:
self.d = '.'
self.sf = 'N'
self.t = 'ALL'
self.r = 'ChineseCharacter.txt'
self.encoding = 'utf-8'
varL = 0
for ar in sys.argv:
if ar == '-d':
self.d = sys.argv[varL+1]
continue
if ar =='-sf':
sf = sys.argv[varL+1].upper()
if (sf == 'Y') | (sf == 'N'):
self.sf = sf
else:
print('input error with sf parameter')
continue
if ar=='-r':
self.r = sys.argv[varL+1]
continue
if ar=='-t':
self.t = sys.argv[varL+1]
continue
if ar == '-encoding':
self.encoding = sys.argv[varL+1]
continue
varL+=1
def seeking(self):
""""
开始查找字符
"""
try:
#output file
self.rfile = open(self.r,'w',encoding=self.encoding)
#start seek
for f in os.listdir(self.d):
path = os.path.join(self.d,f)
if self.__isFile(path):
if self.t != 'ALL':
if f.endswith(self.t):
self.__seek(path)
else:
self.__seek(path)
elif self.__isDir(path) and self.sf == 'Y' :
#seek the sub folder when the self.sf equals 'Y'
self.start(path)
except Exception as error:
print('seek error %s' % error)
finally:
self.__close()
def __close(self):
"""
关闭文件及输入流和输出流
"""
#close the stream and file
self.rfile.close()
def __isFile(self,file):
#
return os.path.isfile(file)
def __isDir(self,path):
#
return os.path.isdir(path)
def __openFile(self,file):
pass
def __closeFile(self,file):
file.close()
def __seek(self,file):
"""
查找
"""
#seek character
fileObj = open(file,'r',encoding=self.encoding)
lineList = fileObj.readlines()
#块注释标记
blockComment = 'finish'
try:
isC = False
for line in lineList:
#查找出注释部分,并跳过
#
#跳过'/*'和'*/'中的内容,处理剩余的内容
if blockComment == 'start':
#块注释内容
index = line.find('*/')
if index != -1:
blockComment = 'finish'
#块注释结束
#处理当前行'*/'后的内容
line = line[index+2:]
else:
#仍处于块注释内容中,跳过
continue
if line.startswith('//'):
#行注释
#跳过行
continue
if line.startswith('/*'):
#块注释开始
blockComment = 'start'
continue
#查找字符
indexTag = 0;
for s in line:
sIndex = line.index(s)
try:
#将不是LATIN开头的字符都找出来
if unicodedata.name(s).startswith('CJK') == True:
#TODO
#content = lineList.index(line)+1+s
isC = True
#如果两个字符间隔大于1,表示为不连续的中文
if (sIndex - indexTag) > 1 :
self.__writeFile('\t'+s)
else:
self.__writeFile(s)
indexTag = sIndex
except Exception as error:
print('seek character error : %s in %s' % (error,fileObj.name))
continue
if isC:
for t in range(8):
self.__writeFile('\t')
self.__writeFile('line:')
self.__writeFile('%d' % (lineList.index(line)+1))
self.__writeFile('\n')
isC = False
finally:
self.__writeFile('\n')
self.__writeFile('------------'+fileObj.name)
self.__writeFile('\n')
fileObj.close()
def __writeFile(self,content):
self.rfile.write(content)
if __name__ == '__main__':
seek=Seek()
seek.seeking()