nas/config/elasticsearch.default-ja-sudachi.yml
KMY(雪あすか) a8fbcb3fb6
Change: #532 ElasticSearch設定の外出し (#650)
* Change: #532 ElasticSearch設定の外出し

* バージョンチェック

* 起動時にエラー
2024-03-12 12:11:13 +09:00

234 lines
4.8 KiB
YAML

# This is a configuration file for environments that use Japanese and Sudachi plug-ins.
# To use this file, copy it to the Mastodon root directory and rename the file to ".elasticsearch.yml".
version: 1
accounts:
filter:
english_stop:
type: stop
stopwords: _english_
english_stemmer:
type: stemmer
language: english
english_possessive_stemmer:
type: stemmer
language: possessive_english
my_posfilter:
type: sudachi_part_of_speech
stoptags:
- 助詞
- 助動詞
- 補助記号,句点
- 補助記号,読点
analyzer:
natural:
tokenizer: standard
filter:
- lowercase
- asciifolding
- cjk_width
- elision
- english_possessive_stemmer
- english_stop
- english_stemmer
sudachi_analyzer:
type: custom
tokenizer: sudachi_tokenizer
filter:
- my_posfilter
- sudachi_normalizedform
verbatim:
tokenizer: standard
filter:
- lowercase
- asciifolding
- cjk_width
edge_ngram:
tokenizer: edge_ngram
filter:
- lowercase
- asciifolding
- cjk_width
tokenizer:
edge_ngram:
type: edge_ngram
min_gram: 1
max_gram: 15
sudachi_tokenizer:
resources_path: '/etc/elasticsearch/sudachi'
split_mode: A
type: sudachi_tokenizer
discard_punctuation: 'true'
public_statuses:
filter:
english_stop:
type: stop
stopwords: _english_
english_stemmer:
type: stemmer
language: english
english_possessive_stemmer:
type: stemmer
language: possessive_english
my_posfilter:
type: sudachi_part_of_speech
stoptags:
- 助詞
- 助動詞
- 補助記号,句点
- 補助記号,読点
analyzer:
content:
tokenizer: uax_url_email
filter:
- english_possessive_stemmer
- lowercase
- asciifolding
- cjk_width
- english_stop
- english_stemmer
hashtag:
tokenizer: keyword
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- cjk_width
sudachi_analyzer:
tokenizer: sudachi_tokenizer
type: custom
filter:
- english_possessive_stemmer
- lowercase
- asciifolding
- cjk_width
- english_stop
- english_stemmer
- my_posfilter
- sudachi_normalizedform
tokenizer:
sudachi_tokenizer:
resources_path: '/etc/elasticsearch/sudachi'
split_mode: A
type: sudachi_tokenizer
discard_punctuation: 'true'
statuses:
filter:
english_stop:
type: stop
stopwords: _english_
english_stemmer:
type: stemmer
language: english
english_possessive_stemmer:
type: stemmer
language: possessive_english
my_posfilter:
type: sudachi_part_of_speech
stoptags:
- 助詞
- 助動詞
- 補助記号,句点
- 補助記号,読点
analyzer:
verbatim:
tokenizer: uax_url_email
filter:
- lowercase
content:
tokenizer: uax_url_email
filter:
- english_possessive_stemmer
- lowercase
- asciifolding
- cjk_width
- english_stop
- english_stemmer
hashtag:
tokenizer: keyword
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- cjk_width
sudachi_analyzer:
tokenizer: sudachi_tokenizer
type: custom
filter:
- english_possessive_stemmer
- lowercase
- asciifolding
- cjk_width
- english_stop
- english_stemmer
- my_posfilter
- sudachi_normalizedform
tags:
analyzer:
content:
tokenizer: keyword
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- cjk_width
edge_ngram:
tokenizer: edge_ngram
filter:
- lowercase
- asciifolding
- cjk_width
tokenizer:
edge_ngram:
type: edge_ngram
min_gram: 2
max_gram: 15
accounts_analyzers:
display_name:
analyzer: verbatim
edge_ngram:
analyzer: edge_ngram
search_analyzer: verbatim
username:
analyzer: verbatim
edge_ngram:
analyzer: edge_ngram
search_analyzer: verbatim
text:
analyzer: sudachi_analyzer
stemmed:
analyzer: natural
public_statuses_analyzers:
text:
analyzer: sudachi_analyzer
stemmed:
analyzer: content
tags:
analyzer: hashtag
statuses_analyzers:
text:
analyzer: sudachi_analyzer
stemmed:
analyzer: content
tags:
analyzer: hashtag
tags_analyzers:
name:
analyzer: content
edge_ngram:
analyzer: edge_ngram
search_analyzer: content