37,719
社区成员
发帖
与我相关
我的任务
分享
#!/usr/bin/python
#-*- coding: utf-8 -*-
import re
from collections import defaultdict
from operator import itemgetter
import sys
reload (sys)
sys.setdefaultencoding('utf-8')
syns_file = open('./test.txt','r')
#syns_file = open('./all_wiki_baidu_syns.txt','r')
sort_syns_file = open('./all_wiki_baidu_syns_sort.txt','w+')
pattern = re.compile(r'(?:[\s|]+)')
word_dict = defaultdict(set)
for each_line in syns_file.readlines():
line = each_line.strip('\n')
words = pattern.split(line.strip('|'))
if len(words) > 1:
word_dict[words[0]].update(words[1:])
for key,words in sorted(word_dict.items(),key = itemgetter(0)):
sort_syns_file.write('{0} {1}\n'.format(key,'|'.join(words)))
#print ('{0} {1}\n'.format(key,'|'.join(words)))
syns_file.close()
#sort_syns_file.close()
我想把结果直接输出到一个文件中,结果文件中是乱码,请问格式化输出的结果可以这样输出到文件吗?# -*- coding: utf-8 -*-
import re
txt="""
阿姨 大姨|姨妈|姨娘|
阿姨 姨妈|姨母
阿谀 讨好|奉承
阿谀 奉承|巴结|恭维|献媚|讨好|谀媚|谄媚|谄谀|趋奉|迎阿|逢迎|
阿谀奉承 趋炎附势|
"""
txtList=txt.split('\n')
result=[]
resTxt=''
p = re.compile(r'|$')
for line in txtList:
if line=='':
continue
tmp=re.split('\s+',line)
first=tmp[0]
second=p.sub('', tmp[1])
lenr=len(result)
if lenr==0:
result.append([first,second])
continue
count=0
for item in result:
if first==item[0]:
tmpSecond=re.split('|',second)
for sec in tmpSecond:
if sec not in item[1]:
item[1]+='|'+sec
break
count+=1
if count==lenr:
result.append([first,second])
for item in result:
resTxt+=item[0]+' '+item[1]+'\n'
print(resTxt)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
from collections import defaultdict
from operator import itemgetter
word_text = """
阿姨 大姨|姨妈|姨娘|
阿姨 姨妈|姨母
阿諛 讨好|奉承
阿谀 奉承|巴结|恭维|献媚|讨好|谀媚|谄媚|谄谀|趋奉|迎阿|逢迎|
阿谀奉承 趋炎附势|
"""
ptn = re.compile(r'(?:[\s|]+)')
word_dict = defaultdict(set)
for line in word_text.splitlines():
words = ptn.split(line.strip('|'))
if len(words) > 1:
word_dict[words[0]].update(words[1:])
for key, words in sorted(word_dict.items(), key=itemgetter(0)):
print('{0} {1}'.format(key, '|'.join(words)))
C:\Python36\python.exe D:/OneDrive/PyProjects/Python3_Study/combine_words.py
阿姨 姨母|大姨|姨娘|姨妈
阿諛 奉承|讨好
阿谀 迎阿|奉承|逢迎|献媚|谄媚|巴结|谄谀|谀媚|趋奉|恭维|讨好
阿谀奉承 趋炎附势
Process finished with exit code 0
import pandas as pd
df = pd.read_table("test.txt", sep='\s+', encoding='utf-8', index_col=0, header=None)
df
Out[229]:
1
0
阿姨 大姨|姨妈|姨娘|
阿姨 姨妈|姨母
阿諛 讨好|奉承
阿谀 奉承|巴结|恭维|献媚|讨好|谀媚|谄媚|谄谀|趋奉|迎阿|逢迎|
阿谀奉承 趋炎附势
groupd = df.groupby(level=0)
df2 = groupd.sum()
df2
Out[232]:
1
0
阿姨 大姨|姨妈|姨娘|姨妈|姨母
阿諛 讨好|奉承
阿谀 奉承|巴结|恭维|献媚|讨好|谀媚|谄媚|谄谀|趋奉|迎阿|逢迎|
阿谀奉承 趋炎附势