1. 程式人生 > >第八章 馬爾科夫鏈 -將演講內容生成鏈長為100的markov組成的句子

第八章 馬爾科夫鏈 -將演講內容生成鏈長為100的markov組成的句子

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
from random import randint
from urllib.request import urlopen


def wordListSum(wordList):
    sum=0
    for word ,value in wordList.items():
        sum+=value
    return sum
def retrieveRandomWord(wordList):
    randomIndex=randint(1,wordListSum(wordList))
    for word,value in wordList:
        randomIndex-=value
        if value<=0:
            return word
def buildWordDict(text):
    #剔除換行符和引號
    text=text.replace("\n"," ")
    text=text.replace("\"","")
    punctuation=[',','.',';',':']
    for symbol in punctuation:
        text=text.replace(symbol," "+symbol+" ")
    words=text.split(" ")
    words=[word for word in words if word!=""]
    wordDict={}
    for i in range(0,len(words)):
        if words[i-1] not in wordDict:
            #新建一個字典
            wordDict[words[i-1]]={}
        if words[i] not in wordDict[words[i-1]]:
            wordDict[words[i-1]][words[i]]=wordDict[words[i-1]][words[i]]+1
    return wordDict
text=str(urlopen("https://pythonscraping.com/files/inaugurationSpeech.txt").read(),'utf-8')
wordDict=buildWordDict(text)
#生成鏈長為100的馬爾科夫鏈
length=100
chain=""
currentWord="I"
for i in range(0,length):
    chain+=currentWord
    currentWord=retrieveRandomWord(wordDict[currentWord])
print(chain)