📅  最后修改于: 2022-03-11 14:56:14.005000             🧑  作者: Mango
# extract productions from three trees and induce the PCFG
print("Induce PCFG grammar from treebank data:")
productions = []
for item in treebank.fileids()[:2]:
for tree in treebank.parsed_sents(item):
# perform optional tree transformations, e.g.:
tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
productions += tree.productions()