📅  最后修改于: 2022-03-11 14:45:39.582000             🧑  作者: Mango
from arabert.preprocess import ArabertPreprocessor
model_name = "aubmindlab/bert-base-arabertv2"
arabert_prep = ArabertPreprocessor(model_name=model_name)
text = "ÙÙÙ ÙباÙغ إذا ÙÙÙا: Ø¥Ù 'ÙاتÙ' Ø£Ù 'ÙÙ
بÙÙتر اÙÙ
Ùتب' Ù٠زÙ
ÙÙا Ùذا ضرÙرÙ"
arabert_prep.preprocess(text)
>>>"Ù+ ÙÙ ÙباÙغ إذا ÙÙ +Ùا : Ø¥Ù ' Ùات٠' Ø£Ù ' ÙÙ
بÙÙتر اÙ+ Ù
Ùتب ' Ù٠زÙ
Ù +Ùا Ùذا ضرÙرÙ"