📅  最后修改于: 2022-03-11 14:45:51.897000             🧑  作者: Mango
from nltk import word_tokenize
from nltk.util import ngrams
text = ['cant railway station', 'citadel hotel', 'police stn']
for line in text:
token = nltk.word_tokenize(line)
bigram = list(ngrams(token, 2))
# the '2' represents bigram...you can change it to get ngrams with different size