37,721
社区成员
发帖
与我相关
我的任务
分享
from random import randint
from urllib.request import urlopen
def wordListSum(word_list):
sum_int = 0
for word, value in word_list:
sum_int += value
return sum_int
def retrieveRandomWord(wordList):
rand_index = randint(1, wordListSum(wordList))
for word, value in wordList:
rand_index -= value
if rand_index <= 0:
return word
def buildWordDict(text_str):
# 剔除换行号和引号
text_str = text_str.replace("\n", " ")
text_str = text_str.replace("\"", "")
punctuation = [',', '.', ';', ':']
for symbol in punctuation:
text_str = text_str.replace(symbol, " " + symbol + " ")
words = text_str.split(" ")
# 过滤空单词
words = [word for word in words if word != ""]
word_dicts = {}
for no in range(1, len(words)):
if words[no - 1] not in word_dicts[words[no - 1]]:
word_dicts[words[no - 1]][words[no]] = 0
word_dicts[words[no - 1]][words[no]] += 1
return word_dicts
text = str(urlopen("http://pythonscraping.com/files/inaugurationSpeech.txt"
"").read(), 'utf-8')
word_dict = buildWordDict(text)
# 生成链长为100的马尔科夫链
length = 100
chain = ""
currentWord = "I"
for i in range(0, length):
chain += currentWord + " "
currentWord = retrieveRandomWord(word_dict[currentWord])
print(chain)
Traceback (most recent call last):
File "D:/gitDocument/document/WebScrapingWithPython/markovChain.py", line 42, in <module>
word_dict = buildWordDict(text)
File "D:/gitDocument/document/WebScrapingWithPython/markovChain.py", line 34, in buildWordDict
if words[no - 1] not in word_dicts[words[no - 1]]:
KeyError: 'Called'