141 lines
3.4 KiB
Python
141 lines
3.4 KiB
Python
|
"""Swedish search language: includes the JS Swedish stemmer."""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from typing import TYPE_CHECKING, Dict
|
||
|
|
||
|
import snowballstemmer
|
||
|
|
||
|
from sphinx.search import SearchLanguage, parse_stop_word
|
||
|
|
||
|
swedish_stopwords = parse_stop_word('''
|
||
|
| source: http://snowball.tartarus.org/algorithms/swedish/stop.txt
|
||
|
och | and
|
||
|
det | it, this/that
|
||
|
att | to (with infinitive)
|
||
|
i | in, at
|
||
|
en | a
|
||
|
jag | I
|
||
|
hon | she
|
||
|
som | who, that
|
||
|
han | he
|
||
|
på | on
|
||
|
den | it, this/that
|
||
|
med | with
|
||
|
var | where, each
|
||
|
sig | him(self) etc
|
||
|
för | for
|
||
|
så | so (also: seed)
|
||
|
till | to
|
||
|
är | is
|
||
|
men | but
|
||
|
ett | a
|
||
|
om | if; around, about
|
||
|
hade | had
|
||
|
de | they, these/those
|
||
|
av | of
|
||
|
icke | not, no
|
||
|
mig | me
|
||
|
du | you
|
||
|
henne | her
|
||
|
då | then, when
|
||
|
sin | his
|
||
|
nu | now
|
||
|
har | have
|
||
|
inte | inte någon = no one
|
||
|
hans | his
|
||
|
honom | him
|
||
|
skulle | 'sake'
|
||
|
hennes | her
|
||
|
där | there
|
||
|
min | my
|
||
|
man | one (pronoun)
|
||
|
ej | nor
|
||
|
vid | at, by, on (also: vast)
|
||
|
kunde | could
|
||
|
något | some etc
|
||
|
från | from, off
|
||
|
ut | out
|
||
|
när | when
|
||
|
efter | after, behind
|
||
|
upp | up
|
||
|
vi | we
|
||
|
dem | them
|
||
|
vara | be
|
||
|
vad | what
|
||
|
över | over
|
||
|
än | than
|
||
|
dig | you
|
||
|
kan | can
|
||
|
sina | his
|
||
|
här | here
|
||
|
ha | have
|
||
|
mot | towards
|
||
|
alla | all
|
||
|
under | under (also: wonder)
|
||
|
någon | some etc
|
||
|
eller | or (else)
|
||
|
allt | all
|
||
|
mycket | much
|
||
|
sedan | since
|
||
|
ju | why
|
||
|
denna | this/that
|
||
|
själv | myself, yourself etc
|
||
|
detta | this/that
|
||
|
åt | to
|
||
|
utan | without
|
||
|
varit | was
|
||
|
hur | how
|
||
|
ingen | no
|
||
|
mitt | my
|
||
|
ni | you
|
||
|
bli | to be, become
|
||
|
blev | from bli
|
||
|
oss | us
|
||
|
din | thy
|
||
|
dessa | these/those
|
||
|
några | some etc
|
||
|
deras | their
|
||
|
blir | from bli
|
||
|
mina | my
|
||
|
samma | (the) same
|
||
|
vilken | who, that
|
||
|
er | you, your
|
||
|
sådan | such a
|
||
|
vår | our
|
||
|
blivit | from bli
|
||
|
dess | its
|
||
|
inom | within
|
||
|
mellan | between
|
||
|
sådant | such a
|
||
|
varför | why
|
||
|
varje | each
|
||
|
vilka | who, that
|
||
|
ditt | thy
|
||
|
vem | who
|
||
|
vilket | who, that
|
||
|
sitta | his
|
||
|
sådana | such a
|
||
|
vart | each
|
||
|
dina | thy
|
||
|
vars | whose
|
||
|
vårt | our
|
||
|
våra | our
|
||
|
ert | your
|
||
|
era | your
|
||
|
vilkas | whose
|
||
|
''')
|
||
|
|
||
|
|
||
|
class SearchSwedish(SearchLanguage):
|
||
|
lang = 'sv'
|
||
|
language_name = 'Swedish'
|
||
|
js_stemmer_rawcode = 'swedish-stemmer.js'
|
||
|
stopwords = swedish_stopwords
|
||
|
|
||
|
def init(self, options: dict) -> None:
|
||
|
self.stemmer = snowballstemmer.stemmer('swedish')
|
||
|
|
||
|
def stem(self, word: str) -> str:
|
||
|
return self.stemmer.stemWord(word.lower())
|