我的第一个python程序,纪念一下。字典文件选的是ubuntu系统中美式英语和英式英语常用单词列表(/usr/share/dict/american-english和/usr/share/dict/british-english)。结果显示s开头的单词最多,11000多,c开头的也不少,接近10000个。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def getCountList(path):
dict = {}
sum = 0
file = open(path,'r')
wordList = file.readlines()
for word in wordList:
letter = word[0].lower()
if(96 < ord(letter) < 123 ):
if(dict.has_key(letter)):
dict[letter] += 1
else:
dict[letter] = 1
sum += 1
else:
print '该单词不能识别 %s' % word
dict['*'] = sum
return sorted(dict.items(), key=lambda x:x[1], reverse=True)
def main():
americanList = getCountList('/usr/share/dict/american-english')
britishList = getCountList('/usr/share/dict/british-english')
print ' American British '
print '====================='
for i in range(0,27):
print ' %s %6d | %s %6d' %(americanList[i][0], americanList[i][1], britishList[i][0], britishList[i][1])
if __name__ == '__main__':
main()
输出结果
American British
=====================
* 98553 | * 98310
s 11072 | s 11035
c 9507 | c 9478
p 7539 | p 7513
b 6095 | b 6087
m 5952 | m 5958
a 5902 | a 5884
d 5811 | d 5807
r 5285 | r 5270
t 5002 | t 4996
f 4075 | f 4057
h 3856 | h 3855
e 3813 | e 3796
i 3613 | i 3598
g 3452 | g 3436
l 3437 | l 3425
w 2711 | w 2706
o 2240 | o 2238
n 2015 | n 2011
u 1899 | u 1895
v 1584 | v 1582
j 1259 | j 1253
k 1247 | k 1250
q 464 | q 461
y 380 | y 376
z 287 | z 287
x 56 | x 56