416 lines
13 KiB
Python
416 lines
13 KiB
Python
# Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
|
|
from .basestemmer import BaseStemmer
|
|
from .among import Among
|
|
|
|
|
|
class GermanStemmer(BaseStemmer):
|
|
'''
|
|
This class implements the stemming algorithm defined by a snowball script.
|
|
Generated by Snowball 2.2.0 - https://snowballstem.org/
|
|
'''
|
|
|
|
a_0 = [
|
|
Among(u"", -1, 5),
|
|
Among(u"U", 0, 2),
|
|
Among(u"Y", 0, 1),
|
|
Among(u"\u00E4", 0, 3),
|
|
Among(u"\u00F6", 0, 4),
|
|
Among(u"\u00FC", 0, 2)
|
|
]
|
|
|
|
a_1 = [
|
|
Among(u"e", -1, 2),
|
|
Among(u"em", -1, 1),
|
|
Among(u"en", -1, 2),
|
|
Among(u"ern", -1, 1),
|
|
Among(u"er", -1, 1),
|
|
Among(u"s", -1, 3),
|
|
Among(u"es", 5, 2)
|
|
]
|
|
|
|
a_2 = [
|
|
Among(u"en", -1, 1),
|
|
Among(u"er", -1, 1),
|
|
Among(u"st", -1, 2),
|
|
Among(u"est", 2, 1)
|
|
]
|
|
|
|
a_3 = [
|
|
Among(u"ig", -1, 1),
|
|
Among(u"lich", -1, 1)
|
|
]
|
|
|
|
a_4 = [
|
|
Among(u"end", -1, 1),
|
|
Among(u"ig", -1, 2),
|
|
Among(u"ung", -1, 1),
|
|
Among(u"lich", -1, 3),
|
|
Among(u"isch", -1, 2),
|
|
Among(u"ik", -1, 2),
|
|
Among(u"heit", -1, 3),
|
|
Among(u"keit", -1, 4)
|
|
]
|
|
|
|
g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8]
|
|
|
|
g_s_ending = [117, 30, 5]
|
|
|
|
g_st_ending = [117, 30, 4]
|
|
|
|
I_x = 0
|
|
I_p2 = 0
|
|
I_p1 = 0
|
|
|
|
def __r_prelude(self):
|
|
v_1 = self.cursor
|
|
while True:
|
|
v_2 = self.cursor
|
|
try:
|
|
try:
|
|
v_3 = self.cursor
|
|
try:
|
|
self.bra = self.cursor
|
|
if not self.eq_s(u"\u00DF"):
|
|
raise lab2()
|
|
self.ket = self.cursor
|
|
if not self.slice_from(u"ss"):
|
|
return False
|
|
raise lab1()
|
|
except lab2: pass
|
|
self.cursor = v_3
|
|
if self.cursor >= self.limit:
|
|
raise lab0()
|
|
self.cursor += 1
|
|
except lab1: pass
|
|
continue
|
|
except lab0: pass
|
|
self.cursor = v_2
|
|
break
|
|
self.cursor = v_1
|
|
while True:
|
|
v_4 = self.cursor
|
|
try:
|
|
try:
|
|
while True:
|
|
v_5 = self.cursor
|
|
try:
|
|
if not self.in_grouping(GermanStemmer.g_v, 97, 252):
|
|
raise lab5()
|
|
self.bra = self.cursor
|
|
try:
|
|
v_6 = self.cursor
|
|
try:
|
|
if not self.eq_s(u"u"):
|
|
raise lab7()
|
|
self.ket = self.cursor
|
|
if not self.in_grouping(GermanStemmer.g_v, 97, 252):
|
|
raise lab7()
|
|
if not self.slice_from(u"U"):
|
|
return False
|
|
raise lab6()
|
|
except lab7: pass
|
|
self.cursor = v_6
|
|
if not self.eq_s(u"y"):
|
|
raise lab5()
|
|
self.ket = self.cursor
|
|
if not self.in_grouping(GermanStemmer.g_v, 97, 252):
|
|
raise lab5()
|
|
if not self.slice_from(u"Y"):
|
|
return False
|
|
except lab6: pass
|
|
self.cursor = v_5
|
|
raise lab4()
|
|
except lab5: pass
|
|
self.cursor = v_5
|
|
if self.cursor >= self.limit:
|
|
raise lab3()
|
|
self.cursor += 1
|
|
except lab4: pass
|
|
continue
|
|
except lab3: pass
|
|
self.cursor = v_4
|
|
break
|
|
return True
|
|
|
|
def __r_mark_regions(self):
|
|
self.I_p1 = self.limit
|
|
self.I_p2 = self.limit
|
|
v_1 = self.cursor
|
|
c = self.cursor + 3
|
|
if c > self.limit:
|
|
return False
|
|
self.cursor = c
|
|
self.I_x = self.cursor
|
|
self.cursor = v_1
|
|
if not self.go_out_grouping(GermanStemmer.g_v, 97, 252):
|
|
return False
|
|
self.cursor += 1
|
|
if not self.go_in_grouping(GermanStemmer.g_v, 97, 252):
|
|
return False
|
|
self.cursor += 1
|
|
self.I_p1 = self.cursor
|
|
try:
|
|
if not self.I_p1 < self.I_x:
|
|
raise lab0()
|
|
self.I_p1 = self.I_x
|
|
except lab0: pass
|
|
if not self.go_out_grouping(GermanStemmer.g_v, 97, 252):
|
|
return False
|
|
self.cursor += 1
|
|
if not self.go_in_grouping(GermanStemmer.g_v, 97, 252):
|
|
return False
|
|
self.cursor += 1
|
|
self.I_p2 = self.cursor
|
|
return True
|
|
|
|
def __r_postlude(self):
|
|
while True:
|
|
v_1 = self.cursor
|
|
try:
|
|
self.bra = self.cursor
|
|
among_var = self.find_among(GermanStemmer.a_0)
|
|
if among_var == 0:
|
|
raise lab0()
|
|
self.ket = self.cursor
|
|
if among_var == 1:
|
|
if not self.slice_from(u"y"):
|
|
return False
|
|
elif among_var == 2:
|
|
if not self.slice_from(u"u"):
|
|
return False
|
|
elif among_var == 3:
|
|
if not self.slice_from(u"a"):
|
|
return False
|
|
elif among_var == 4:
|
|
if not self.slice_from(u"o"):
|
|
return False
|
|
else:
|
|
if self.cursor >= self.limit:
|
|
raise lab0()
|
|
self.cursor += 1
|
|
continue
|
|
except lab0: pass
|
|
self.cursor = v_1
|
|
break
|
|
return True
|
|
|
|
def __r_R1(self):
|
|
if not self.I_p1 <= self.cursor:
|
|
return False
|
|
return True
|
|
|
|
def __r_R2(self):
|
|
if not self.I_p2 <= self.cursor:
|
|
return False
|
|
return True
|
|
|
|
def __r_standard_suffix(self):
|
|
v_1 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(GermanStemmer.a_1)
|
|
if among_var == 0:
|
|
raise lab0()
|
|
self.bra = self.cursor
|
|
if not self.__r_R1():
|
|
raise lab0()
|
|
if among_var == 1:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 2:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
v_2 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
if not self.eq_s_b(u"s"):
|
|
self.cursor = self.limit - v_2
|
|
raise lab1()
|
|
self.bra = self.cursor
|
|
if not self.eq_s_b(u"nis"):
|
|
self.cursor = self.limit - v_2
|
|
raise lab1()
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab1: pass
|
|
else:
|
|
if not self.in_grouping_b(GermanStemmer.g_s_ending, 98, 116):
|
|
raise lab0()
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab0: pass
|
|
self.cursor = self.limit - v_1
|
|
v_3 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(GermanStemmer.a_2)
|
|
if among_var == 0:
|
|
raise lab2()
|
|
self.bra = self.cursor
|
|
if not self.__r_R1():
|
|
raise lab2()
|
|
if among_var == 1:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
else:
|
|
if not self.in_grouping_b(GermanStemmer.g_st_ending, 98, 116):
|
|
raise lab2()
|
|
c = self.cursor - 3
|
|
if c < self.limit_backward:
|
|
raise lab2()
|
|
self.cursor = c
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab2: pass
|
|
self.cursor = self.limit - v_3
|
|
v_4 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
among_var = self.find_among_b(GermanStemmer.a_4)
|
|
if among_var == 0:
|
|
raise lab3()
|
|
self.bra = self.cursor
|
|
if not self.__r_R2():
|
|
raise lab3()
|
|
if among_var == 1:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
v_5 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
if not self.eq_s_b(u"ig"):
|
|
self.cursor = self.limit - v_5
|
|
raise lab4()
|
|
self.bra = self.cursor
|
|
v_6 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"e"):
|
|
raise lab5()
|
|
self.cursor = self.limit - v_5
|
|
raise lab4()
|
|
except lab5: pass
|
|
self.cursor = self.limit - v_6
|
|
if not self.__r_R2():
|
|
self.cursor = self.limit - v_5
|
|
raise lab4()
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab4: pass
|
|
elif among_var == 2:
|
|
v_7 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"e"):
|
|
raise lab6()
|
|
raise lab3()
|
|
except lab6: pass
|
|
self.cursor = self.limit - v_7
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
elif among_var == 3:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
v_8 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
try:
|
|
v_9 = self.limit - self.cursor
|
|
try:
|
|
if not self.eq_s_b(u"er"):
|
|
raise lab9()
|
|
raise lab8()
|
|
except lab9: pass
|
|
self.cursor = self.limit - v_9
|
|
if not self.eq_s_b(u"en"):
|
|
self.cursor = self.limit - v_8
|
|
raise lab7()
|
|
except lab8: pass
|
|
self.bra = self.cursor
|
|
if not self.__r_R1():
|
|
self.cursor = self.limit - v_8
|
|
raise lab7()
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab7: pass
|
|
else:
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
v_10 = self.limit - self.cursor
|
|
try:
|
|
self.ket = self.cursor
|
|
if self.find_among_b(GermanStemmer.a_3) == 0:
|
|
self.cursor = self.limit - v_10
|
|
raise lab10()
|
|
self.bra = self.cursor
|
|
if not self.__r_R2():
|
|
self.cursor = self.limit - v_10
|
|
raise lab10()
|
|
if not self.slice_del():
|
|
return False
|
|
|
|
except lab10: pass
|
|
except lab3: pass
|
|
self.cursor = self.limit - v_4
|
|
return True
|
|
|
|
def _stem(self):
|
|
v_1 = self.cursor
|
|
self.__r_prelude()
|
|
self.cursor = v_1
|
|
v_2 = self.cursor
|
|
self.__r_mark_regions()
|
|
self.cursor = v_2
|
|
self.limit_backward = self.cursor
|
|
self.cursor = self.limit
|
|
self.__r_standard_suffix()
|
|
self.cursor = self.limit_backward
|
|
v_4 = self.cursor
|
|
self.__r_postlude()
|
|
self.cursor = v_4
|
|
return True
|
|
|
|
|
|
class lab0(BaseException): pass
|
|
|
|
|
|
class lab1(BaseException): pass
|
|
|
|
|
|
class lab2(BaseException): pass
|
|
|
|
|
|
class lab3(BaseException): pass
|
|
|
|
|
|
class lab4(BaseException): pass
|
|
|
|
|
|
class lab5(BaseException): pass
|
|
|
|
|
|
class lab6(BaseException): pass
|
|
|
|
|
|
class lab7(BaseException): pass
|
|
|
|
|
|
class lab8(BaseException): pass
|
|
|
|
|
|
class lab9(BaseException): pass
|
|
|
|
|
|
class lab10(BaseException): pass
|