Word count
import jieba
fname = '/mnt/ssd/share/books/Alice_Wonderland.txt'
f = open(fname, 'r', encoding='utf-8')
txt = f.read()
# tokenizer
words = jieba.lcut(txt)
# statistics
counts = {}
for word in words:
if len(word) == 1: # ignore single world
continue
else:
counts[word] = counts.get(word, 0) + 1
# sort
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
# output top 20
for i in range(20):
print(items[i][0], items[i][1])