34
社区成员




# NPU 程序设计实训 HW12 词频统计
# 函数 word_freq() 用于统计词频
import string
import re
def word_freq(path):
# 读取高频词
sight_word = open('./sight word.txt', 'r').read()
sight_word = sight_word.lower()
sight_word_list = sight_word.split()
# 读取文本
text = open(path, 'r').read()
text_list = re.findall(r'[a-z\'0-9—“”]+', text.lower())
text_list = [i.lower() for i in text_list if i and i.lower() not in sight_word_list]
dic = {}
# 统计词频
for i in text_list:
if i in dic:
dic[i] += 1
else:
dic[i] = 1
result = sorted(dic.items(), key=lambda x: (x[1], x[0]), reverse=True)
return result[:10]
# 测试函数
path = './text.txt'
print(word_freq(path))
结果如下