1. 程式人生 > >用python分析英語母音及子音音素在單詞中的出現頻率資料

用python分析英語母音及子音音素在單詞中的出現頻率資料

import sqlite3
import matplotlib.pyplot as plt


vowels=["iː","i","ɪ","e","æ","ɑː","ɒ","ɔː","ʊ","u","uː","ʌ","ɜː","ə","eɪ","əʊ","aɪ","aʊ","ɔɪ","ɪə","eə","ʊə"]

consonants=["p","b","t","d","k","ɡ","tʃ","dʒ","f","v","θ","ð","s","z","ʃ","ʒ","h","m","n","ŋ","l","r","j","w"]



conn=sqlite3.connect(r"d:\englishwords.db")
cursor=conn.cursor()

sql=r"SELECT * FROM englishwords WHERE pronunciation!='' AND pronunciation LIKE '%iː%'"
cursor.execute(sql)
result=cursor.fetchall()
print(len(result))

def countPhoneme(phoneme):
    sql=r"SELECT pronunciation FROM englishwords WHERE pronunciation!='' AND pronunciation LIKE '%{}%'".format(phoneme)
    cursor.execute(sql)
    result=cursor.fetchall()
    return len(result)



figure,(ax0,ax1)=plt.subplots(nrows=2,ncols=1)
figure.set_figheight(10)
figure.set_figwidth(8)
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.3)

vowelsdata=dict()

for i in vowels:
    vowelsdata[i]=0
    count=countPhoneme(i)
    print(i," -> ",count)
    vowelsdata[i]=count
    
vowelsdata=sorted(vowelsdata.items(),key = lambda x:x[1],reverse = True)

sortedX=[]
sortedY=[]
for i in vowelsdata:
    sortedX.append(i[0])
    sortedY.append(i[1])

ax0.set_title("Vowel frequency in English words")
ax0.bar(sortedX,sortedY,color="#ff6666")





consonantsdata=dict()

for i in consonants:
    consonantsdata[i]=0
    count=countPhoneme(i)
    print(i," -> ",count)
    consonantsdata[i]=count
    
consonantsdata=sorted(consonantsdata.items(),key = lambda x:x[1],reverse = True)

sortedX=[]
sortedY=[]
for i in consonantsdata:
    sortedX.append(i[0])
    sortedY.append(i[1])

ax1.set_title("\n\nConsonant frequency in English words")
ax1.bar(sortedX,sortedY,color="#668866")


plt.show()