493 lines
15 KiB
Python
493 lines
15 KiB
Python
# Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
|
|
from .basestemmer import BaseStemmer
|
|
from .among import Among
|
|
|
|
|
|
class RussianStemmer(BaseStemmer):
|
|
'''
|
|
This class implements the stemming algorithm defined by a snowball script.
|
|
Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
'''
|
|
|
|
a_0 = [
|
|
Among(u"\u0432", -1, 1),
|
|
Among(u"\u0438\u0432", 0, 2),
|
|
Among(u"\u044B\u0432", 0, 2),
|
|
Among(u"\u0432\u0448\u0438", -1, 1),
|
|
Among(u"\u0438\u0432\u0448\u0438", 3, 2),
|
|
Among(u"\u044B\u0432\u0448\u0438", 3, 2),
|
|
Among(u"\u0432\u0448\u0438\u0441\u044C", -1, 1),
|
|
Among(u"\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2),
|
|
Among(u"\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2)
|
|
]
|
|
|
|
a_1 = [
|
|
Among(u"\u0435\u0435", -1, 1),
|
|
Among(u"\u0438\u0435", -1, 1),
|
|
Among(u"\u043E\u0435", -1, 1),
|
|
Among(u"\u044B\u0435", -1, 1),
|
|
Among(u"\u0438\u043C\u0438", -1, 1),
|
|
Among(u"\u044B\u043C\u0438", -1, 1),
|
|
Among(u"\u0435\u0439", -1, 1),
|
|
Among(u"\u0438\u0439", -1, 1),
|
|
Among(u"\u043E\u0439", -1, 1),
|
|
Among(u"\u044B\u0439", -1, 1),
|
|
Among(u"\u0435\u043C", -1, 1),
|
|
Among(u"\u0438\u043C", -1, 1),
|
|
Among(u"\u043E\u043C", -1, 1),
|
|
Among(u"\u044B\u043C", -1, 1),
|
|
Among(u"\u0435\u0433\u043E", -1, 1),
|
|
Among(u"\u043E\u0433\u043E", -1, 1),
|
|
Among(u"\u0435\u043C\u0443", -1, 1),
|
|
Among(u"\u043E\u043C\u0443", -1, 1),
|
|
Among(u"\u0438\u0445", -1, 1),
|
|
Among(u"\u044B\u0445", -1, 1),
|
|
Among(u"\u0435\u044E", -1, 1),
|
|
Among(u"\u043E\u044E", -1, 1),
|
|
Among(u"\u0443\u044E", -1, 1),
|
|
Among(u"\u044E\u044E", -1, 1),
|
|
Among(u"\u0430\u044F", -1, 1),
|
|
Among(u"\u044F\u044F", -1, 1)
|
|
]
|
|
|
|
a_2 = [
|
|
Among(u"\u0435\u043C", -1, 1),
|
|
Among(u"\u043D\u043D", -1, 1),
|
|
Among(u"\u0432\u0448", -1, 1),
|
|
Among(u"\u0438\u0432\u0448", 2, 2),
|
|
Among(u"\u044B\u0432\u0448", 2, 2),
|
|
Among(u"\u0449", -1, 1),
|
|
Among(u"\u044E\u0449", 5, 1),
|
|
Among(u"\u0443\u044E\u0449", 6, 2)
|
|
]
|
|
|
|
a_3 = [
|
|
Among(u"\u0441\u044C", -1, 1),
|
|
Among(u"\u0441\u044F", -1, 1)
|
|
]
|
|
|
|
a_4 = [
|
|
Among(u"\u043B\u0430", -1, 1),
|
|
Among(u"\u0438\u043B\u0430", 0, 2),
|
|
Among(u"\u044B\u043B\u0430", 0, 2),
|
|
Among(u"\u043D\u0430", -1, 1),
|
|
Among(u"\u0435\u043D\u0430", 3, 2),
|
|
Among(u"\u0435\u0442\u0435", -1, 1),
|
|
Among(u"\u0438\u0442\u0435", -1, 2),
|
|
Among(u"\u0439\u0442\u0435", -1, 1),
|
|
Among(u"\u0435\u0439\u0442\u0435", 7, 2),
|
|
Among(u"\u0443\u0439\u0442\u0435", 7, 2),
|
|
Among(u"\u043B\u0438", -1, 1),
|
|
Among(u"\u0438\u043B\u0438", 10, 2),
|
|
Among(u"\u044B\u043B\u0438", 10, 2),
|
|
Among(u"\u0439", -1, 1),
|
|
Among(u"\u0435\u0439", 13, 2),
|
|
Among(u"\u0443\u0439", 13, 2),
|
|
Among(u"\u043B", -1, 1),
|
|
Among(u"\u0438\u043B", 16, 2),
|
|
Among(u"\u044B\u043B", 16, 2),
|
|
Among(u"\u0435\u043C", -1, 1),
|
|
Among(u"\u0438\u043C", -1, 2),
|
|
Among(u"\u044B\u043C", -1, 2),
|
|
Among(u"\u043D", -1, 1),
|
|
Among(u"\u0435\u043D", 22, 2),
|
|
Among(u"\u043B\u043E", -1, 1),
|
|
Among(u"\u0438\u043B\u043E", 24, 2),
|
|
Among(u"\u044B\u043B\u043E", 24, 2),
|
|
Among(u"\u043D\u043E", -1, 1),
|
|
Among(u"\u0435\u043D\u043E", 27, 2),
|
|
Among(u"\u043D\u043D\u043E", 27, 1),
|
|
Among(u"\u0435\u0442", -1, 1),
|
|
Among(u"\u0443\u0435\u0442", 30, 2),
|
|
Among(u"\u0438\u0442", -1, 2),
|
|
Among(u"\u044B\u0442", -1, 2),
|
|
Among(u"\u044E\u0442", -1, 1),
|
|
Among(u"\u0443\u044E\u0442", 34, 2),
|
|
Among(u"\u044F\u0442", -1, 2),
|
|
Among(u"\u043D\u044B", -1, 1),
|
|
Among(u"\u0435\u043D\u044B", 37, 2),
|
|
Among(u"\u0442\u044C", -1, 1),
|
|
Among(u"\u0438\u0442\u044C", 39, 2),
|
|
Among(u"\u044B\u0442\u044C", 39, 2),
|
|
Among(u"\u0435\u0448\u044C", -1, 1),
|
|
Among(u"\u0438\u0448\u044C", -1, 2),
|
|
Among(u"\u044E", -1, 2),
|
|
Among(u"\u0443\u044E", 44, 2)
|
|
]
|
|
|
|
a_5 = [
|
|
Among(u"\u0430", -1, 1),
|
|
Among(u"\u0435\u0432", -1, 1),
|
|
Among(u"\u043E\u0432", -1, 1),
|
|
Among(u"\u0435", -1, 1),
|
|
Among(u"\u0438\u0435", 3, 1),
|
|
Among(u"\u044C\u0435", 3, 1),
|
|
Among(u"\u0438", -1, 1),
|
|
Among(u"\u0435\u0438", 6, 1),
|
|
Among(u"\u0438\u0438", 6, 1),
|
|
Among(u"\u0430\u043C\u0438", 6, 1),
|
|
Among(u"\u044F\u043C\u0438", 6, 1),
|
|
Among(u"\u0438\u044F\u043C\u0438", 10, 1),
|
|
Among(u"\u0439", -1, 1),
|
|
Among(u"\u0435\u0439", 12, 1),
|
|
Among(u"\u0438\u0435\u0439", 13, 1),
|
|
Among(u"\u0438\u0439", 12, 1),
|
|
Among(u"\u043E\u0439", 12, 1),
|
|
Among(u"\u0430\u043C", -1, 1),
|
|
Among(u"\u0435\u043C", -1, 1),
|
|
Among(u"\u0438\u0435\u043C", 18, 1),
|
|
Among(u"\u043E\u043C", -1, 1),
|
|
Among(u"\u044F\u043C", -1, 1),
|
|
Among(u"\u0438\u044F\u043C", 21, 1),
|
|
Among(u"\u043E", -1, 1),
|
|
Among(u"\u0443", -1, 1),
|
|
Among(u"\u0430\u0445", -1, 1),
|
|
Among(u"\u044F\u0445", -1, 1),
|
|
Among(u"\u0438\u044F\u0445", 26, 1),
|
|
Among(u"\u044B", -1, 1),
|
|
Among(u"\u044C", -1, 1),
|
|
Among(u"\u044E", -1, 1),
|
|
Among(u"\u0438\u044E", 30, 1),
|
|
Among(u"\u044C\u044E", 30, 1),
|
|
Among(u"\u044F", -1, 1),
|
|
Among(u"\u0438\u044F", 33, 1),
|
|
Among(u"\u044C\u044F", 33, 1)
|
|
]
|
|
|
|
a_6 = [
|
|
Among(u"\u043E\u0441\u0442", -1, 1),
|
|
Among(u"\u043E\u0441\u0442\u044C", -1, 1)
|
|
]
|
|
|
|
a_7 = [
|
|
Among(u"\u0435\u0439\u0448\u0435", -1, 1),
|
|
Among(u"\u043D", -1, 2),
|
|
Among(u"\u0435\u0439\u0448", -1, 1),
|
|
Among(u"\u044C", -1, 3)
|
|
]
|
|
|
|
g_v = [33, 65, 8, 232]
|
|
|
|
I_p2 = 0
|
|
I_pV = 0
|
|
|
|
def __r_mark_regions(self):
|
|
self.I_pV = self.limit
|
|
self.I_p2 = self.limit
|
|
v_1 = self.cursor
|
|
try:
|
|
if not self.go_out_grouping(RussianStemmer.g_v, 1072, 1103):
|
|
raise lab0()
|
|
self.cursor += 1
|
|
self.I_pV = self.cursor
|
|
if not self.go_in_grouping(RussianStemmer.g_v, 1072, 1103):
|
|
raise lab0()
|
|
self.cursor += 1
|
|
if not self.go_out_grouping(RussianStemmer.g_v, 1072, 1103):
|
|
raise lab0()
|
|
self.cursor += 1
|
|
if not self.go_in_grouping(RussianStemmer.g_v, 1072, 1103):
|
|
raise lab0()
|
|
self.cursor += 1
|
|
self.I_p2 = self.cursor
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
return True
|
|
|
|
def __r_R2(self):
|
|
if not self.I_p2 <= self.cursor:
|
|
return False
|
|
return True
|
|
|
|
def __r_perfective_gerund(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(RussianStemmer.a_0)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
try:
|
|
v_1 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"\u0430"):
|
|
raise lab1()
|
|
raise lab0()
|
|
except lab1: pass
|
|
self.cursor = self.limit - v_1
|
|
if not self.eq_s_b(u"\u044F"):
|
|
return False
|
|
except lab0: pass
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_adjective(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(RussianStemmer.a_1) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_adjectival(self):
|
|
if not self.__r_adjective():
|
|
return False
|
|
v_1 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(RussianStemmer.a_2)
|
|
if among_var == 0:
|
|
self.cursor = self.limit - v_1
|
|
raise lab0()
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
try:
|
|
v_2 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"\u0430"):
|
|
raise lab2()
|
|
raise lab1()
|
|
except lab2: pass
|
|
self.cursor = self.limit - v_2
|
|
if not self.eq_s_b(u"\u044F"):
|
|
self.cursor = self.limit - v_1
|
|
raise lab0()
|
|
except lab1: pass
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab0: pass
|
|
return True
|
|
|
|
def __r_reflexive(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(RussianStemmer.a_3) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_verb(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(RussianStemmer.a_4)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
try:
|
|
v_1 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"\u0430"):
|
|
raise lab1()
|
|
raise lab0()
|
|
except lab1: pass
|
|
self.cursor = self.limit - v_1
|
|
if not self.eq_s_b(u"\u044F"):
|
|
return False
|
|
except lab0: pass
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_noun(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(RussianStemmer.a_5) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_derivational(self):
|
|
self.ket = self.cursor
|
|
if self.find_among_b(RussianStemmer.a_6) == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.__r_R2():
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def __r_tidy_up(self):
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(RussianStemmer.a_7)
|
|
if among_var == 0:
|
|
return False
|
|
self.bra = self.cursor
|
|
if among_var == 1:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
self.ket = self.cursor
|
|
if not self.eq_s_b(u"\u043D"):
|
|
return False
|
|
self.bra = self.cursor
|
|
if not self.eq_s_b(u"\u043D"):
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not self.eq_s_b(u"\u043D"):
|
|
return False
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
return True
|
|
|
|
def _stem(self):
|
|
v_1 = self.cursor
|
|
try:
|
|
while True:
|
|
v_2 = self.cursor
|
|
try:
|
|
try:
|
|
while True:
|
|
v_3 = self.cursor
|
|
try:
|
|
self.bra = self.cursor
|
|
if not self.eq_s(u"\u0451"):
|
|
raise lab3()
|
|
self.ket = self.cursor
|
|
self.cursor = v_3
|
|
raise lab2()
|
|
except lab3: pass
|
|
self.cursor = v_3
|
|
if self.cursor >= self.limit:
|
|
raise lab1()
|
|
self.cursor += 1
|
|
except lab2: pass
|
|
if not self.slice_from(u"\u0435"):
|
|
return False
|
|
continue
|
|
except lab1: pass
|
|
self.cursor = v_2
|
|
break
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
self.__r_mark_regions()
|
|
self.limit_backward = self.cursor
|
|
self.cursor = self.limit
|
|
if self.cursor < self.I_pV:
|
|
return False
|
|
v_6 = self.limit_backward
|
|
self.limit_backward = self.I_pV
|
|
v_7 = self.limit - self.cursor
|
|
try:
|
|
try:
|
|
v_8 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_perfective_gerund():
|
|
raise lab6()
|
|
raise lab5()
|
|
except lab6: pass
|
|
self.cursor = self.limit - v_8
|
|
v_9 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_reflexive():
|
|
self.cursor = self.limit - v_9
|
|
raise lab7()
|
|
except lab7: pass
|
|
try:
|
|
v_10 = self.limit - self.cursor
|
|
try:
|
|
if not self.__r_adjectival():
|
|
raise lab9()
|
|
raise lab8()
|
|
except lab9: pass
|
|
self.cursor = self.limit - v_10
|
|
try:
|
|
if not self.__r_verb():
|
|
raise lab10()
|
|
raise lab8()
|
|
except lab10: pass
|
|
self.cursor = self.limit - v_10
|
|
if not self.__r_noun():
|
|
raise lab4()
|
|
except lab8: pass
|
|
except lab5: pass
|
|
except lab4: pass
|
|
self.cursor = self.limit - v_7
|
|
v_11 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
if not self.eq_s_b(u"\u0438"):
|
|
self.cursor = self.limit - v_11
|
|
raise lab11()
|
|
self.bra = self.cursor
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab11: pass
|
|
v_12 = self.limit - self.cursor
|
|
self.__r_derivational()
|
|
self.cursor = self.limit - v_12
|
|
v_13 = self.limit - self.cursor
|
|
self.__r_tidy_up()
|
|
self.cursor = self.limit - v_13
|
|
self.limit_backward = v_6
|
|
self.cursor = self.limit_backward
|
|
return True
|
|
|
|
|
|
class lab0(BaseException): pass
|
|
|
|
|
|
class lab1(BaseException): pass
|
|
|
|
|
|
class lab2(BaseException): pass
|
|
|
|
|
|
class lab3(BaseException): pass
|
|
|
|
|
|
class lab4(BaseException): pass
|
|
|
|
|
|
class lab5(BaseException): pass
|
|
|
|
|
|
class lab6(BaseException): pass
|
|
|
|
|
|
class lab7(BaseException): pass
|
|
|
|
|
|
class lab8(BaseException): pass
|
|
|
|
|
|
class lab9(BaseException): pass
|
|
|
|
|
|
class lab10(BaseException): pass
|
|
|
|
|
|
class lab11(BaseException): pass
|