203 lines
4.8 KiB
Python
203 lines
4.8 KiB
Python
|
"""Norwegian search language: includes the JS Norwegian stemmer."""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from typing import TYPE_CHECKING, Dict
|
||
|
|
||
|
import snowballstemmer
|
||
|
|
||
|
from sphinx.search import SearchLanguage, parse_stop_word
|
||
|
|
||
|
norwegian_stopwords = parse_stop_word('''
|
||
|
| source: http://snowball.tartarus.org/algorithms/norwegian/stop.txt
|
||
|
og | and
|
||
|
i | in
|
||
|
jeg | I
|
||
|
det | it/this/that
|
||
|
at | to (w. inf.)
|
||
|
en | a/an
|
||
|
et | a/an
|
||
|
den | it/this/that
|
||
|
til | to
|
||
|
er | is/am/are
|
||
|
som | who/that
|
||
|
på | on
|
||
|
de | they / you(formal)
|
||
|
med | with
|
||
|
han | he
|
||
|
av | of
|
||
|
ikke | not
|
||
|
ikkje | not *
|
||
|
der | there
|
||
|
så | so
|
||
|
var | was/were
|
||
|
meg | me
|
||
|
seg | you
|
||
|
men | but
|
||
|
ett | one
|
||
|
har | have
|
||
|
om | about
|
||
|
vi | we
|
||
|
min | my
|
||
|
mitt | my
|
||
|
ha | have
|
||
|
hadde | had
|
||
|
hun | she
|
||
|
nå | now
|
||
|
over | over
|
||
|
da | when/as
|
||
|
ved | by/know
|
||
|
fra | from
|
||
|
du | you
|
||
|
ut | out
|
||
|
sin | your
|
||
|
dem | them
|
||
|
oss | us
|
||
|
opp | up
|
||
|
man | you/one
|
||
|
kan | can
|
||
|
hans | his
|
||
|
hvor | where
|
||
|
eller | or
|
||
|
hva | what
|
||
|
skal | shall/must
|
||
|
selv | self (reflective)
|
||
|
sjøl | self (reflective)
|
||
|
her | here
|
||
|
alle | all
|
||
|
vil | will
|
||
|
bli | become
|
||
|
ble | became
|
||
|
blei | became *
|
||
|
blitt | have become
|
||
|
kunne | could
|
||
|
inn | in
|
||
|
når | when
|
||
|
være | be
|
||
|
kom | come
|
||
|
noen | some
|
||
|
noe | some
|
||
|
ville | would
|
||
|
dere | you
|
||
|
som | who/which/that
|
||
|
deres | their/theirs
|
||
|
kun | only/just
|
||
|
ja | yes
|
||
|
etter | after
|
||
|
ned | down
|
||
|
skulle | should
|
||
|
denne | this
|
||
|
for | for/because
|
||
|
deg | you
|
||
|
si | hers/his
|
||
|
sine | hers/his
|
||
|
sitt | hers/his
|
||
|
mot | against
|
||
|
å | to
|
||
|
meget | much
|
||
|
hvorfor | why
|
||
|
dette | this
|
||
|
disse | these/those
|
||
|
uten | without
|
||
|
hvordan | how
|
||
|
ingen | none
|
||
|
din | your
|
||
|
ditt | your
|
||
|
blir | become
|
||
|
samme | same
|
||
|
hvilken | which
|
||
|
hvilke | which (plural)
|
||
|
sånn | such a
|
||
|
inni | inside/within
|
||
|
mellom | between
|
||
|
vår | our
|
||
|
hver | each
|
||
|
hvem | who
|
||
|
vors | us/ours
|
||
|
hvis | whose
|
||
|
både | both
|
||
|
bare | only/just
|
||
|
enn | than
|
||
|
fordi | as/because
|
||
|
før | before
|
||
|
mange | many
|
||
|
også | also
|
||
|
slik | just
|
||
|
vært | been
|
||
|
være | to be
|
||
|
båe | both *
|
||
|
begge | both
|
||
|
siden | since
|
||
|
dykk | your *
|
||
|
dykkar | yours *
|
||
|
dei | they *
|
||
|
deira | them *
|
||
|
deires | theirs *
|
||
|
deim | them *
|
||
|
di | your (fem.) *
|
||
|
då | as/when *
|
||
|
eg | I *
|
||
|
ein | a/an *
|
||
|
eit | a/an *
|
||
|
eitt | a/an *
|
||
|
elles | or *
|
||
|
honom | he *
|
||
|
hjå | at *
|
||
|
ho | she *
|
||
|
hoe | she *
|
||
|
henne | her
|
||
|
hennar | her/hers
|
||
|
hennes | hers
|
||
|
hoss | how *
|
||
|
hossen | how *
|
||
|
ikkje | not *
|
||
|
ingi | noone *
|
||
|
inkje | noone *
|
||
|
korleis | how *
|
||
|
korso | how *
|
||
|
kva | what/which *
|
||
|
kvar | where *
|
||
|
kvarhelst | where *
|
||
|
kven | who/whom *
|
||
|
kvi | why *
|
||
|
kvifor | why *
|
||
|
me | we *
|
||
|
medan | while *
|
||
|
mi | my *
|
||
|
mine | my *
|
||
|
mykje | much *
|
||
|
no | now *
|
||
|
nokon | some (masc./neut.) *
|
||
|
noka | some (fem.) *
|
||
|
nokor | some *
|
||
|
noko | some *
|
||
|
nokre | some *
|
||
|
si | his/hers *
|
||
|
sia | since *
|
||
|
sidan | since *
|
||
|
so | so *
|
||
|
somt | some *
|
||
|
somme | some *
|
||
|
um | about*
|
||
|
upp | up *
|
||
|
vere | be *
|
||
|
vore | was *
|
||
|
verte | become *
|
||
|
vort | become *
|
||
|
varte | became *
|
||
|
vart | became *
|
||
|
''')
|
||
|
|
||
|
|
||
|
class SearchNorwegian(SearchLanguage):
|
||
|
lang = 'no'
|
||
|
language_name = 'Norwegian'
|
||
|
js_stemmer_rawcode = 'norwegian-stemmer.js'
|
||
|
stopwords = norwegian_stopwords
|
||
|
|
||
|
def init(self, options: dict) -> None:
|
||
|
self.stemmer = snowballstemmer.stemmer('norwegian')
|
||
|
|
||
|
def stem(self, word: str) -> str:
|
||
|
return self.stemmer.stemWord(word.lower())
|