1200 lines
40 KiB
Python
1200 lines
40 KiB
Python
# Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
|
|
from .basestemmer import BaseStemmer
|
|
from .among import Among
|
|
|
|
|
|
class ArabicStemmer(BaseStemmer):
|
|
'''
|
|
This class implements the stemming algorithm defined by a snowball script.
|
|
Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
'''
|
|
|
|
a_0 = [
|
|
Among(u"\u0640", -1, 1),
|
|
Among(u"\u064B", -1, 1),
|
|
Among(u"\u064C", -1, 1),
|
|
Among(u"\u064D", -1, 1),
|
|
Among(u"\u064E", -1, 1),
|
|
Among(u"\u064F", -1, 1),
|
|
Among(u"\u0650", -1, 1),
|
|
Among(u"\u0651", -1, 1),
|
|
Among(u"\u0652", -1, 1),
|
|
Among(u"\u0660", -1, 2),
|
|
Among(u"\u0661", -1, 3),
|
|
Among(u"\u0662", -1, 4),
|
|
Among(u"\u0663", -1, 5),
|
|
Among(u"\u0664", -1, 6),
|
|
Among(u"\u0665", -1, 7),
|
|
Among(u"\u0666", -1, 8),
|
|
Among(u"\u0667", -1, 9),
|
|
Among(u"\u0668", -1, 10),
|
|
Among(u"\u0669", -1, 11),
|
|
Among(u"\uFE80", -1, 12),
|
|
Among(u"\uFE81", -1, 16),
|
|
Among(u"\uFE82", -1, 16),
|
|
Among(u"\uFE83", -1, 13),
|
|
Among(u"\uFE84", -1, 13),
|
|
Among(u"\uFE85", -1, 17),
|
|
Among(u"\uFE86", -1, 17),
|
|
Among(u"\uFE87", -1, 14),
|
|
Among(u"\uFE88", -1, 14),
|
|
Among(u"\uFE89", -1, 15),
|
|
Among(u"\uFE8A", -1, 15),
|
|
Among(u"\uFE8B", -1, 15),
|
|
Among(u"\uFE8C", -1, 15),
|
|
Among(u"\uFE8D", -1, 18),
|
|
Among(u"\uFE8E", -1, 18),
|
|
Among(u"\uFE8F", -1, 19),
|
|
Among(u"\uFE90", -1, 19),
|
|
Among(u"\uFE91", -1, 19),
|
|
Among(u"\uFE92", -1, 19),
|
|
Among(u"\uFE93", -1, 20),
|
|
Among(u"\uFE94", -1, 20),
|
|
Among(u"\uFE95", -1, 21),
|
|
Among(u"\uFE96", -1, 21),
|
|
Among(u"\uFE97", -1, 21),
|
|
Among(u"\uFE98", -1, 21),
|
|
Among(u"\uFE99", -1, 22),
|
|
Among(u"\uFE9A", -1, 22),
|
|
Among(u"\uFE9B", -1, 22),
|
|
Among(u"\uFE9C", -1, 22),
|
|
Among(u"\uFE9D", -1, 23),
|
|
Among(u"\uFE9E", -1, 23),
|
|
Among(u"\uFE9F", -1, 23),
|
|
Among(u"\uFEA0", -1, 23),
|
|
Among(u"\uFEA1", -1, 24),
|
|
Among(u"\uFEA2", -1, 24),
|
|
Among(u"\uFEA3", -1, 24),
|
|
Among(u"\uFEA4", -1, 24),
|
|
Among(u"\uFEA5", -1, 25),
|
|
Among(u"\uFEA6", -1, 25),
|
|
Among(u"\uFEA7", -1, 25),
|
|
Among(u"\uFEA8", -1, 25),
|
|
Among(u"\uFEA9", -1, 26),
|
|
Among(u"\uFEAA", -1, 26),
|
|
Among(u"\uFEAB", -1, 27),
|
|
Among(u"\uFEAC", -1, 27),
|
|
Among(u"\uFEAD", -1, 28),
|
|
Among(u"\uFEAE", -1, 28),
|
|
Among(u"\uFEAF", -1, 29),
|
|
Among(u"\uFEB0", -1, 29),
|
|
Among(u"\uFEB1", -1, 30),
|
|
Among(u"\uFEB2", -1, 30),
|
|
Among(u"\uFEB3", -1, 30),
|
|
Among(u"\uFEB4", -1, 30),
|
|
Among(u"\uFEB5", -1, 31),
|
|
Among(u"\uFEB6", -1, 31),
|
|
Among(u"\uFEB7", -1, 31),
|
|
Among(u"\uFEB8", -1, 31),
|
|
Among(u"\uFEB9", -1, 32),
|
|
Among(u"\uFEBA", -1, 32),
|
|
Among(u"\uFEBB", -1, 32),
|
|
Among(u"\uFEBC", -1, 32),
|
|
Among(u"\uFEBD", -1, 33),
|
|
Among(u"\uFEBE", -1, 33),
|
|
Among(u"\uFEBF", -1, 33),
|
|
Among(u"\uFEC0", -1, 33),
|
|
Among(u"\uFEC1", -1, 34),
|
|
Among(u"\uFEC2", -1, 34),
|
|
Among(u"\uFEC3", -1, 34),
|
|
Among(u"\uFEC4", -1, 34),
|
|
Among(u"\uFEC5", -1, 35),
|
|
Among(u"\uFEC6", -1, 35),
|
|
Among(u"\uFEC7", -1, 35),
|
|
Among(u"\uFEC8", -1, 35),
|
|
Among(u"\uFEC9", -1, 36),
|
|
Among(u"\uFECA", -1, 36),
|
|
Among(u"\uFECB", -1, 36),
|
|
Among(u"\uFECC", -1, 36),
|
|
Among(u"\uFECD", -1, 37),
|
|
Among(u"\uFECE", -1, 37),
|
|
Among(u"\uFECF", -1, 37),
|
|
Among(u"\uFED0", -1, 37),
|
|
Among(u"\uFED1", -1, 38),
|
|
Among(u"\uFED2", -1, 38),
|
|
Among(u"\uFED3", -1, 38),
|
|
Among(u"\uFED4", -1, 38),
|
|
Among(u"\uFED5", -1, 39),
|
|
Among(u"\uFED6", -1, 39),
|
|
Among(u"\uFED7", -1, 39),
|
|
Among(u"\uFED8", -1, 39),
|
|
Among(u"\uFED9", -1, 40),
|
|
Among(u"\uFEDA", -1, 40),
|
|
Among(u"\uFEDB", -1, 40),
|
|
Among(u"\uFEDC", -1, 40),
|
|
Among(u"\uFEDD", -1, 41),
|
|
Among(u"\uFEDE", -1, 41),
|
|
Among(u"\uFEDF", -1, 41),
|
|
Among(u"\uFEE0", -1, 41),
|
|
Among(u"\uFEE1", -1, 42),
|
|
Among(u"\uFEE2", -1, 42),
|
|
Among(u"\uFEE3", -1, 42),
|
|
Among(u"\uFEE4", -1, 42),
|
|
Among(u"\uFEE5", -1, 43),
|
|
Among(u"\uFEE6", -1, 43),
|
|
Among(u"\uFEE7", -1, 43),
|
|
Among(u"\uFEE8", -1, 43),
|
|
Among(u"\uFEE9", -1, 44),
|
|
Among(u"\uFEEA", -1, 44),
|
|
Among(u"\uFEEB", -1, 44),
|
|
Among(u"\uFEEC", -1, 44),
|
|
Among(u"\uFEED", -1, 45),
|
|
Among(u"\uFEEE", -1, 45),
|
|
Among(u"\uFEEF", -1, 46),
|
|
Among(u"\uFEF0", -1, 46),
|
|
Among(u"\uFEF1", -1, 47),
|
|
Among(u"\uFEF2", -1, 47),
|
|
Among(u"\uFEF3", -1, 47),
|
|
Among(u"\uFEF4", -1, 47),
|
|
Among(u"\uFEF5", -1, 51),
|
|
Among(u"\uFEF6", -1, 51),
|
|
Among(u"\uFEF7", -1, 49),
|
|
Among(u"\uFEF8", -1, 49),
|
|
Among(u"\uFEF9", -1, 50),
|
|
Among(u"\uFEFA", -1, 50),
|
|
Among(u"\uFEFB", -1, 48),
|
|
Among(u"\uFEFC", -1, 48)
|
|
]
|
|
|
|
a_1 = [
|
|
Among(u"\u0622", -1, 1),
|
|
Among(u"\u0623", -1, 1),
|
|
Among(u"\u0624", -1, 1),
|
|
Among(u"\u0625", -1, 1),
|
|
Among(u"\u0626", -1, 1)
|
|
]
|
|
|
|
a_2 = [
|
|
Among(u"\u0622", -1, 1),
|
|
Among(u"\u0623", -1, 1),
|
|
Among(u"\u0624", -1, 2),
|
|
Among(u"\u0625", -1, 1),
|
|
Among(u"\u0626", -1, 3)
|
|
]
|
|
|
|
a_3 = [
|
|
Among(u"\u0627\u0644", -1, 2),
|
|
Among(u"\u0628\u0627\u0644", -1, 1),
|
|
Among(u"\u0643\u0627\u0644", -1, 1),
|
|
Among(u"\u0644\u0644", -1, 2)
|
|
]
|
|
|
|
a_4 = [
|
|
Among(u"\u0623\u0622", -1, 2),
|
|
Among(u"\u0623\u0623", -1, 1),
|
|
Among(u"\u0623\u0624", -1, 1),
|
|
Among(u"\u0623\u0625", -1, 4),
|
|
Among(u"\u0623\u0627", -1, 3)
|
|
]
|
|
|
|
a_5 = [
|
|
Among(u"\u0641", -1, 1),
|
|
Among(u"\u0648", -1, 1)
|
|
]
|
|
|
|
a_6 = [
|
|
Among(u"\u0627\u0644", -1, 2),
|
|
Among(u"\u0628\u0627\u0644", -1, 1),
|
|
Among(u"\u0643\u0627\u0644", -1, 1),
|
|
Among(u"\u0644\u0644", -1, 2)
|
|
]
|
|
|
|
a_7 = [
|
|
Among(u"\u0628", -1, 1),
|
|
Among(u"\u0628\u0627", 0, -1),
|
|
Among(u"\u0628\u0628", 0, 2),
|
|
Among(u"\u0643\u0643", -1, 3)
|
|
]
|
|
|
|
a_8 = [
|
|
Among(u"\u0633\u0623", -1, 4),
|
|
Among(u"\u0633\u062A", -1, 2),
|
|
Among(u"\u0633\u0646", -1, 3),
|
|
Among(u"\u0633\u064A", -1, 1)
|
|
]
|
|
|
|
a_9 = [
|
|
Among(u"\u062A\u0633\u062A", -1, 1),
|
|
Among(u"\u0646\u0633\u062A", -1, 1),
|
|
Among(u"\u064A\u0633\u062A", -1, 1)
|
|
]
|
|
|
|
a_10 = [
|
|
Among(u"\u0643\u0645\u0627", -1, 3),
|
|
Among(u"\u0647\u0645\u0627", -1, 3),
|
|
Among(u"\u0646\u0627", -1, 2),
|
|
Among(u"\u0647\u0627", -1, 2),
|
|
Among(u"\u0643", -1, 1),
|
|
Among(u"\u0643\u0645", -1, 2),
|
|
Among(u"\u0647\u0645", -1, 2),
|
|
Among(u"\u0647\u0646", -1, 2),
|
|
Among(u"\u0647", -1, 1),
|
|
Among(u"\u064A", -1, 1)
|
|
]
|
|
|
|
a_11 = [
|
|
Among(u"\u0646", -1, 1)
|
|
]
|
|
|
|
a_12 = [
|
|
Among(u"\u0627", -1, 1),
|
|
Among(u"\u0648", -1, 1),
|
|
Among(u"\u064A", -1, 1)
|
|
]
|
|
|
|
a_13 = [
|
|
Among(u"\u0627\u062A", -1, 1)
|
|
]
|
|
|
|
a_14 = [
|
|
Among(u"\u062A", -1, 1)
|
|
]
|
|
|
|
a_15 = [
|
|
Among(u"\u0629", -1, 1)
|
|
]
|
|
|
|
a_16 = [
|
|
Among(u"\u064A", -1, 1)
|
|
]
|
|
|
|
a_17 = [
|
|
Among(u"\u0643\u0645\u0627", -1, 3),
|
|
Among(u"\u0647\u0645\u0627", -1, 3),
|
|
Among(u"\u0646\u0627", -1, 2),
|
|
Among(u"\u0647\u0627", -1, 2),
|
|
Among(u"\u0643", -1, 1),
|
|
Among(u"\u0643\u0645", -1, 2),
|
|
Among(u"\u0647\u0645", -1, 2),
|
|
Among(u"\u0643\u0646", -1, 2),
|
|
Among(u"\u0647\u0646", -1, 2),
|
|
Among(u"\u0647", -1, 1),
|
|
Among(u"\u0643\u0645\u0648", -1, 3),
|
|
Among(u"\u0646\u064A", -1, 2)
|
|
]
|
|
|
|
a_18 = [
|
|
Among(u"\u0627", -1, 1),
|
|
Among(u"\u062A\u0627", 0, 2),
|
|
Among(u"\u062A\u0645\u0627", 0, 4),
|
|
Among(u"\u0646\u0627", 0, 2),
|
|
Among(u"\u062A", -1, 1),
|
|
Among(u"\u0646", -1, 1),
|
|
Among(u"\u0627\u0646", 5, 3),
|
|
Among(u"\u062A\u0646", 5, 2),
|
|
Among(u"\u0648\u0646", 5, 3),
|
|
Among(u"\u064A\u0646", 5, 3),
|
|
Among(u"\u064A", -1, 1)
|
|
]
|
|
|
|
a_19 = [
|
|
Among(u"\u0648\u0627", -1, 1),
|
|
Among(u"\u062A\u0645", -1, 1)
|
|
]
|
|
|
|
a_20 = [
|
|
Among(u"\u0648", -1, 1),
|
|
Among(u"\u062A\u0645\u0648", 0, 2)
|
|
]
|
|
|
|
a_21 = [
|
|
Among(u"\u0649", -1, 1)
|
|
]
|
|
|
|
B_is_defined = False
|
|
B_is_verb = False
|
|
B_is_noun = False
|
|
|
|
def __r_Normalize_pre(self):
|
|
v_1 = self.cursor
|
|
try:
|
|
while True:
|
|
v_2 = self.cursor
|
|
try:
|
|
try:
|
|
v_3 = self.cursor
|
|
try:
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_0)
|
|
if among_var == 0:
|
|
raise lab3()
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not self.slice_from(u"0"):
|
|
return False
|
|
elif among_var == 3:
|
|
if not self.slice_from(u"1"):
|
|
return False
|
|
elif among_var == 4:
|
|
if not self.slice_from(u"2"):
|
|
return False
|
|
elif among_var == 5:
|
|
if not self.slice_from(u"3"):
|
|
return False
|
|
elif among_var == 6:
|
|
if not self.slice_from(u"4"):
|
|
return False
|
|
elif among_var == 7:
|
|
if not self.slice_from(u"5"):
|
|
return False
|
|
elif among_var == 8:
|
|
if not self.slice_from(u"6"):
|
|
return False
|
|
elif among_var == 9:
|
|
if not self.slice_from(u"7"):
|
|
return False
|
|
elif among_var == 10:
|
|
if not self.slice_from(u"8"):
|
|
return False
|
|
elif among_var == 11:
|
|
if not self.slice_from(u"9"):
|
|
return False
|
|
elif among_var == 12:
|
|
if not self.slice_from(u"\u0621"):
|
|
return False
|
|
elif among_var == 13:
|
|
if not self.slice_from(u"\u0623"):
|
|
return False
|
|
elif among_var == 14:
|
|
if not self.slice_from(u"\u0625"):
|
|
return False
|
|
elif among_var == 15:
|
|
if not self.slice_from(u"\u0626"):
|
|
return False
|
|
elif among_var == 16:
|
|
if not self.slice_from(u"\u0622"):
|
|
return False
|
|
elif among_var == 17:
|
|
if not self.slice_from(u"\u0624"):
|
|
return False
|
|
elif among_var == 18:
|
|
if not self.slice_from(u"\u0627"):
|
|
return False
|
|
elif among_var == 19:
|
|
if not self.slice_from(u"\u0628"):
|
|
return False
|
|
elif among_var == 20:
|
|
if not self.slice_from(u"\u0629"):
|
|
return False
|
|
elif among_var == 21:
|
|
if not self.slice_from(u"\u062A"):
|
|
return False
|
|
elif among_var == 22:
|
|
if not self.slice_from(u"\u062B"):
|
|
return False
|
|
elif among_var == 23:
|
|
if not self.slice_from(u"\u062C"):
|
|
return False
|
|
elif among_var == 24:
|
|
if not self.slice_from(u"\u062D"):
|
|
return False
|
|
elif among_var == 25:
|
|
if not self.slice_from(u"\u062E"):
|
|
return False
|
|
elif among_var == 26:
|
|
if not self.slice_from(u"\u062F"):
|
|
return False
|
|
elif among_var == 27:
|
|
if not self.slice_from(u"\u0630"):
|
|
return False
|
|
elif among_var == 28:
|
|
if not self.slice_from(u"\u0631"):
|
|
return False
|
|
elif among_var == 29:
|
|
if not self.slice_from(u"\u0632"):
|
|
return False
|
|
elif among_var == 30:
|
|
if not self.slice_from(u"\u0633"):
|
|
return False
|
|
elif among_var == 31:
|
|
if not self.slice_from(u"\u0634"):
|
|
return False
|
|
elif among_var == 32:
|
|
if not self.slice_from(u"\u0635"):
|
|
return False
|
|
elif among_var == 33:
|
|
if not self.slice_from(u"\u0636"):
|
|
return False
|
|
elif among_var == 34:
|
|
if not self.slice_from(u"\u0637"):
|
|
return False
|
|
elif among_var == 35:
|
|
if not self.slice_from(u"\u0638"):
|
|
return False
|
|
elif among_var == 36:
|
|
if not self.slice_from(u"\u0639"):
|
|
return False
|
|
elif among_var == 37:
|
|
if not self.slice_from(u"\u063A"):
|
|
return False
|
|
elif among_var == 38:
|
|
if not self.slice_from(u"\u0641"):
|
|
return False
|
|
elif among_var == 39:
|
|
if not self.slice_from(u"\u0642"):
|
|
return False
|
|
elif among_var == 40:
|
|
if not self.slice_from(u"\u0643"):
|
|
return False
|
|
elif among_var == 41:
|
|
if not self.slice_from(u"\u0644"):
|
|
return False
|
|
elif among_var == 42:
|
|
if not self.slice_from(u"\u0645"):
|
|
return False
|
|
elif among_var == 43:
|
|
if not self.slice_from(u"\u0646"):
|
|
return False
|
|
elif among_var == 44:
|
|
if not self.slice_from(u"\u0647"):
|
|
return False
|
|
elif among_var == 45:
|
|
if not self.slice_from(u"\u0648"):
|
|
return False
|
|
elif among_var == 46:
|
|
if not self.slice_from(u"\u0649"):
|
|
return False
|
|
elif among_var == 47:
|
|
if not self.slice_from(u"\u064A"):
|
|
return False
|
|
elif among_var == 48:
|
|
if not self.slice_from(u"\u0644\u0627"):
|
|
return False
|
|
elif among_var == 49:
|
|
if not self.slice_from(u"\u0644\u0623"):
|
|
return False
|
|
elif among_var == 50:
|
|
if not self.slice_from(u"\u0644\u0625"):
|
|
return False
|
|
else:
|
|
if not self.slice_from(u"\u0644\u0622"):
|
|
return False
|
|
raise lab2()
|
|
except lab3: pass
|
|
self.cursor = v_3
|
|
if self.cursor >= self.limit:
|
|
raise lab1()
|
|
self.cursor += 1
|
|
except lab2: pass
|
|
continue
|
|
except lab1: pass
|
|
self.cursor = v_2
|
|
break
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
return True
|
|
|
|
def __r_Normalize_post(self):
|
|
v_1 = self.cursor
|
|
try:
|
|
self.limit_backward = self.cursor
|
|
self.cursor = self.limit
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_1) == 0:
|
|
raise lab0()
|
|
self.bra = self.cursor
|
|
if not self.slice_from(u"\u0621"):
|
|
return False
|
|
self.cursor = self.limit_backward
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
v_2 = self.cursor
|
|
try:
|
|
while True:
|
|
v_3 = self.cursor
|
|
try:
|
|
try:
|
|
v_4 = self.cursor
|
|
try:
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_2)
|
|
if among_var == 0:
|
|
raise lab4()
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not self.slice_from(u"\u0627"):
|
|
return False
|
|
elif among_var == 2:
|
|
if not self.slice_from(u"\u0648"):
|
|
return False
|
|
else:
|
|
if not self.slice_from(u"\u064A"):
|
|
return False
|
|
raise lab3()
|
|
except lab4: pass
|
|
self.cursor = v_4
|
|
if self.cursor >= self.limit:
|
|
raise lab2()
|
|
self.cursor += 1
|
|
except lab3: pass
|
|
continue
|
|
except lab2: pass
|
|
self.cursor = v_3
|
|
break
|
|
except lab1: pass
|
|
self.cursor = v_2
|
|
return True
|
|
|
|
def __r_Checks1(self):
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_3)
|
|
if among_var == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
self.B_is_noun = True
|
|
self.B_is_verb = False
|
|
self.B_is_defined = True
|
|
else:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
self.B_is_noun = True
|
|
self.B_is_verb = False
|
|
self.B_is_defined = True
|
|
return True
|
|
|
|
def __r_Prefix_Step1(self):
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_4)
|
|
if among_var == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0623"):
|
|
return False
|
|
elif among_var == 2:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0622"):
|
|
return False
|
|
elif among_var == 3:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0627"):
|
|
return False
|
|
else:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0625"):
|
|
return False
|
|
return True
|
|
|
|
def __r_Prefix_Step2(self):
|
|
self.bra = self.cursor
|
|
if self.find_among(ArabicStemmer.a_5) == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if not len(self.current) > 3:
|
|
return False
|
|
v_1 = self.cursor
|
|
try:
|
|
if not self.eq_s(u"\u0627"):
|
|
raise lab0()
|
|
return False
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Prefix_Step3a_Noun(self):
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_6)
|
|
if among_var == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) > 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Prefix_Step3b_Noun(self):
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_7)
|
|
if among_var == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0628"):
|
|
return False
|
|
elif among_var == 3:
|
|
if not len(self.current) > 3:
|
|
return False
|
|
if not self.slice_from(u"\u0643"):
|
|
return False
|
|
return True
|
|
|
|
def __r_Prefix_Step3_Verb(self):
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(ArabicStemmer.a_8)
|
|
if among_var == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_from(u"\u064A"):
|
|
return False
|
|
elif among_var == 2:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_from(u"\u062A"):
|
|
return False
|
|
elif among_var == 3:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_from(u"\u0646"):
|
|
return False
|
|
else:
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_from(u"\u0623"):
|
|
return False
|
|
return True
|
|
|
|
def __r_Prefix_Step4_Verb(self):
|
|
self.bra = self.cursor
|
|
if self.find_among(ArabicStemmer.a_9) == 0:
|
|
return False
|
|
self.ket = self.cursor
|
|
if not len(self.current) > 4:
|
|
return False
|
|
self.B_is_verb = True
|
|
self.B_is_noun = False
|
|
if not self.slice_from(u"\u0627\u0633\u062A"):
|
|
return False
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step1a(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(ArabicStemmer.a_10)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not len(self.current) >= 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not len(self.current) >= 6:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step1b(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_11) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) > 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step2a(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_12) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) > 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step2b(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_13) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) >= 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step2c1(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_14) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step2c2(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_15) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Noun_Step3(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_16) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) >= 3:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Verb_Step1(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(ArabicStemmer.a_17)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not len(self.current) >= 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not len(self.current) >= 6:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Verb_Step2a(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(ArabicStemmer.a_18)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not len(self.current) >= 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 3:
|
|
if not len(self.current) > 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not len(self.current) >= 6:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Verb_Step2b(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_19) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not len(self.current) >= 5:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_Verb_Step2c(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(ArabicStemmer.a_20)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
if not len(self.current) >= 4:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not len(self.current) >= 6:
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_Suffix_All_alef_maqsura(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(ArabicStemmer.a_21) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.slice_from(u"\u064A"):
|
|
return False
|
|
return True
|
|
|
|
def _stem(self):
|
|
self.B_is_noun = True
|
|
self.B_is_verb = True
|
|
self.B_is_defined = False
|
|
v_1 = self.cursor
|
|
self.__r_Checks1()
|
|
self.cursor = v_1
|
|
self.__r_Normalize_pre()
|
|
self.limit_backward = self.cursor
|
|
self.cursor = self.limit
|
|
v_3 = self.limit - self.cursor
|
|
try:
|
|
try:
|
|
v_4 = self.limit - self.cursor
|
|
try:
|
|
if not self.B_is_verb:
|
|
raise lab2()
|
|
try:
|
|
v_5 = self.limit - self.cursor
|
|
try:
|
|
v_6 = 1
|
|
while True:
|
|
v_7 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_Suffix_Verb_Step1():
|
|
raise lab5()
|
|
v_6 -= 1
|
|
continue
|
|
except lab5: pass
|
|
self.cursor = self.limit - v_7
|
|
break
|
|
if v_6 > 0:
|
|
raise lab4()
|
|
try:
|
|
v_8 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_Suffix_Verb_Step2a():
|
|
raise lab7()
|
|
raise lab6()
|
|
except lab7: pass
|
|
self.cursor = self.limit - v_8
|
|
try:
|
|
if not self.__r_Suffix_Verb_Step2c():
|
|
raise lab8()
|
|
raise lab6()
|
|
except lab8: pass
|
|
self.cursor = self.limit - v_8
|
|
if self.cursor <= self.limit_backward:
|
|
raise lab4()
|
|
self.cursor -= 1
|
|
except lab6: pass
|
|
raise lab3()
|
|
except lab4: pass
|
|
self.cursor = self.limit - v_5
|
|
try:
|
|
if not self.__r_Suffix_Verb_Step2b():
|
|
raise lab9()
|
|
raise lab3()
|
|
except lab9: pass
|
|
self.cursor = self.limit - v_5
|
|
if not self.__r_Suffix_Verb_Step2a():
|
|
raise lab2()
|
|
except lab3: pass
|
|
raise lab1()
|
|
except lab2: pass
|
|
self.cursor = self.limit - v_4
|
|
try:
|
|
if not self.B_is_noun:
|
|
raise lab10()
|
|
v_9 = self.limit - self.cursor
|
|
try:
|
|
try:
|
|
v_10 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2c2():
|
|
raise lab13()
|
|
raise lab12()
|
|
except lab13: pass
|
|
self.cursor = self.limit - v_10
|
|
try:
|
|
try:
|
|
if not self.B_is_defined:
|
|
raise lab15()
|
|
raise lab14()
|
|
except lab15: pass
|
|
if not self.__r_Suffix_Noun_Step1a():
|
|
raise lab14()
|
|
try:
|
|
v_12 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2a():
|
|
raise lab17()
|
|
raise lab16()
|
|
except lab17: pass
|
|
self.cursor = self.limit - v_12
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2b():
|
|
raise lab18()
|
|
raise lab16()
|
|
except lab18: pass
|
|
self.cursor = self.limit - v_12
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2c1():
|
|
raise lab19()
|
|
raise lab16()
|
|
except lab19: pass
|
|
self.cursor = self.limit - v_12
|
|
if self.cursor <= self.limit_backward:
|
|
raise lab14()
|
|
self.cursor -= 1
|
|
except lab16: pass
|
|
raise lab12()
|
|
except lab14: pass
|
|
self.cursor = self.limit - v_10
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step1b():
|
|
raise lab20()
|
|
try:
|
|
v_13 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2a():
|
|
raise lab22()
|
|
raise lab21()
|
|
except lab22: pass
|
|
self.cursor = self.limit - v_13
|
|
try:
|
|
if not self.__r_Suffix_Noun_Step2b():
|
|
raise lab23()
|
|
raise lab21()
|
|
except lab23: pass
|
|
self.cursor = self.limit - v_13
|
|
if not self.__r_Suffix_Noun_Step2c1():
|
|
raise lab20()
|
|
except lab21: pass
|
|
raise lab12()
|
|
except lab20: pass
|
|
self.cursor = self.limit - v_10
|
|
try:
|
|
try:
|
|
if not self.B_is_defined:
|
|
raise lab25()
|
|
raise lab24()
|
|
except lab25: pass
|
|
if not self.__r_Suffix_Noun_Step2a():
|
|
raise lab24()
|
|
raise lab12()
|
|
except lab24: pass
|
|
self.cursor = self.limit - v_10
|
|
if not self.__r_Suffix_Noun_Step2b():
|
|
self.cursor = self.limit - v_9
|
|
raise lab11()
|
|
except lab12: pass
|
|
except lab11: pass
|
|
if not self.__r_Suffix_Noun_Step3():
|
|
raise lab10()
|
|
raise lab1()
|
|
except lab10: pass
|
|
self.cursor = self.limit - v_4
|
|
if not self.__r_Suffix_All_alef_maqsura():
|
|
raise lab0()
|
|
except lab1: pass
|
|
except lab0: pass
|
|
self.cursor = self.limit - v_3
|
|
self.cursor = self.limit_backward
|
|
v_15 = self.cursor
|
|
try:
|
|
v_16 = self.cursor
|
|
try:
|
|
if not self.__r_Prefix_Step1():
|
|
self.cursor = v_16
|
|
raise lab27()
|
|
except lab27: pass
|
|
v_17 = self.cursor
|
|
try:
|
|
if not self.__r_Prefix_Step2():
|
|
self.cursor = v_17
|
|
raise lab28()
|
|
except lab28: pass
|
|
try:
|
|
v_18 = self.cursor
|
|
try:
|
|
if not self.__r_Prefix_Step3a_Noun():
|
|
raise lab30()
|
|
raise lab29()
|
|
except lab30: pass
|
|
self.cursor = v_18
|
|
try:
|
|
if not self.B_is_noun:
|
|
raise lab31()
|
|
if not self.__r_Prefix_Step3b_Noun():
|
|
raise lab31()
|
|
raise lab29()
|
|
except lab31: pass
|
|
self.cursor = v_18
|
|
if not self.B_is_verb:
|
|
raise lab26()
|
|
v_19 = self.cursor
|
|
try:
|
|
if not self.__r_Prefix_Step3_Verb():
|
|
self.cursor = v_19
|
|
raise lab32()
|
|
except lab32: pass
|
|
if not self.__r_Prefix_Step4_Verb():
|
|
raise lab26()
|
|
except lab29: pass
|
|
except lab26: pass
|
|
self.cursor = v_15
|
|
self.__r_Normalize_post()
|
|
return True
|
|
|
|
|
|
class lab0(BaseException): pass
|
|
|
|
|
|
class lab1(BaseException): pass
|
|
|
|
|
|
class lab2(BaseException): pass
|
|
|
|
|
|
class lab3(BaseException): pass
|
|
|
|
|
|
class lab4(BaseException): pass
|
|
|
|
|
|
class lab5(BaseException): pass
|
|
|
|
|
|
class lab6(BaseException): pass
|
|
|
|
|
|
class lab7(BaseException): pass
|
|
|
|
|
|
class lab8(BaseException): pass
|
|
|
|
|
|
class lab9(BaseException): pass
|
|
|
|
|
|
class lab10(BaseException): pass
|
|
|
|
|
|
class lab11(BaseException): pass
|
|
|
|
|
|
class lab12(BaseException): pass
|
|
|
|
|
|
class lab13(BaseException): pass
|
|
|
|
|
|
class lab14(BaseException): pass
|
|
|
|
|
|
class lab15(BaseException): pass
|
|
|
|
|
|
class lab16(BaseException): pass
|
|
|
|
|
|
class lab17(BaseException): pass
|
|
|
|
|
|
class lab18(BaseException): pass
|
|
|
|
|
|
class lab19(BaseException): pass
|
|
|
|
|
|
class lab20(BaseException): pass
|
|
|
|
|
|
class lab21(BaseException): pass
|
|
|
|
|
|
class lab22(BaseException): pass
|
|
|
|
|
|
class lab23(BaseException): pass
|
|
|
|
|
|
class lab24(BaseException): pass
|
|
|
|
|
|
class lab25(BaseException): pass
|
|
|
|
|
|
class lab26(BaseException): pass
|
|
|
|
|
|
class lab27(BaseException): pass
|
|
|
|
|
|
class lab28(BaseException): pass
|
|
|
|
|
|
class lab29(BaseException): pass
|
|
|
|
|
|
class lab30(BaseException): pass
|
|
|
|
|
|
class lab31(BaseException): pass
|
|
|
|
|
|
class lab32(BaseException): pass
|