121 lines
3.5 KiB
Python
121 lines
3.5 KiB
Python
"""Danish search language: includes the JS Danish stemmer."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Dict
|
|
|
|
import snowballstemmer
|
|
|
|
from sphinx.search import SearchLanguage, parse_stop_word
|
|
|
|
danish_stopwords = parse_stop_word('''
|
|
| source: http://snowball.tartarus.org/algorithms/danish/stop.txt
|
|
og | and
|
|
i | in
|
|
jeg | I
|
|
det | that (dem. pronoun)/it (pers. pronoun)
|
|
at | that (in front of a sentence)/to (with infinitive)
|
|
en | a/an
|
|
den | it (pers. pronoun)/that (dem. pronoun)
|
|
til | to/at/for/until/against/by/of/into, more
|
|
er | present tense of "to be"
|
|
som | who, as
|
|
på | on/upon/in/on/at/to/after/of/with/for, on
|
|
de | they
|
|
med | with/by/in, along
|
|
han | he
|
|
af | of/by/from/off/for/in/with/on, off
|
|
for | at/for/to/from/by/of/ago, in front/before, because
|
|
ikke | not
|
|
der | who/which, there/those
|
|
var | past tense of "to be"
|
|
mig | me/myself
|
|
sig | oneself/himself/herself/itself/themselves
|
|
men | but
|
|
et | a/an/one, one (number), someone/somebody/one
|
|
har | present tense of "to have"
|
|
om | round/about/for/in/a, about/around/down, if
|
|
vi | we
|
|
min | my
|
|
havde | past tense of "to have"
|
|
ham | him
|
|
hun | she
|
|
nu | now
|
|
over | over/above/across/by/beyond/past/on/about, over/past
|
|
da | then, when/as/since
|
|
fra | from/off/since, off, since
|
|
du | you
|
|
ud | out
|
|
sin | his/her/its/one's
|
|
dem | them
|
|
os | us/ourselves
|
|
op | up
|
|
man | you/one
|
|
hans | his
|
|
hvor | where
|
|
eller | or
|
|
hvad | what
|
|
skal | must/shall etc.
|
|
selv | myself/yourself/herself/ourselves etc., even
|
|
her | here
|
|
alle | all/everyone/everybody etc.
|
|
vil | will (verb)
|
|
blev | past tense of "to stay/to remain/to get/to become"
|
|
kunne | could
|
|
ind | in
|
|
når | when
|
|
være | present tense of "to be"
|
|
dog | however/yet/after all
|
|
noget | something
|
|
ville | would
|
|
jo | you know/you see (adv), yes
|
|
deres | their/theirs
|
|
efter | after/behind/according to/for/by/from, later/afterwards
|
|
ned | down
|
|
skulle | should
|
|
denne | this
|
|
end | than
|
|
dette | this
|
|
mit | my/mine
|
|
også | also
|
|
under | under/beneath/below/during, below/underneath
|
|
have | have
|
|
dig | you
|
|
anden | other
|
|
hende | her
|
|
mine | my
|
|
alt | everything
|
|
meget | much/very, plenty of
|
|
sit | his, her, its, one's
|
|
sine | his, her, its, one's
|
|
vor | our
|
|
mod | against
|
|
disse | these
|
|
hvis | if
|
|
din | your/yours
|
|
nogle | some
|
|
hos | by/at
|
|
blive | be/become
|
|
mange | many
|
|
ad | by/through
|
|
bliver | present tense of "to be/to become"
|
|
hendes | her/hers
|
|
været | be
|
|
thi | for (conj)
|
|
jer | you
|
|
sådan | such, like this/like that
|
|
''')
|
|
|
|
|
|
class SearchDanish(SearchLanguage):
|
|
lang = 'da'
|
|
language_name = 'Danish'
|
|
js_stemmer_rawcode = 'danish-stemmer.js'
|
|
stopwords = danish_stopwords
|
|
|
|
def init(self, options: dict) -> None:
|
|
self.stemmer = snowballstemmer.stemmer('danish')
|
|
|
|
def stem(self, word: str) -> str:
|
|
return self.stemmer.stemWord(word.lower())
|