# This is a configuration file for environments that use Japanese and Sudachi plug-ins. # To use this file, copy it to the Mastodon root directory and rename the file to ".elasticsearch.yml". version: 1 accounts: filter: english_stop: type: stop stopwords: _english_ english_stemmer: type: stemmer language: english english_possessive_stemmer: type: stemmer language: possessive_english my_posfilter: type: sudachi_part_of_speech stoptags: - 助詞 - 助動詞 - 補助記号,句点 - 補助記号,読点 analyzer: natural: tokenizer: standard filter: - lowercase - asciifolding - cjk_width - elision - english_possessive_stemmer - english_stop - english_stemmer sudachi_analyzer: type: custom tokenizer: sudachi_tokenizer filter: - my_posfilter - sudachi_normalizedform verbatim: tokenizer: standard filter: - lowercase - asciifolding - cjk_width edge_ngram: tokenizer: edge_ngram filter: - lowercase - asciifolding - cjk_width tokenizer: edge_ngram: type: edge_ngram min_gram: 1 max_gram: 15 sudachi_tokenizer: resources_path: '/etc/elasticsearch/sudachi' split_mode: A type: sudachi_tokenizer discard_punctuation: 'true' public_statuses: filter: english_stop: type: stop stopwords: _english_ english_stemmer: type: stemmer language: english english_possessive_stemmer: type: stemmer language: possessive_english my_posfilter: type: sudachi_part_of_speech stoptags: - 助詞 - 助動詞 - 補助記号,句点 - 補助記号,読点 analyzer: content: tokenizer: uax_url_email filter: - english_possessive_stemmer - lowercase - asciifolding - cjk_width - english_stop - english_stemmer hashtag: tokenizer: keyword filter: - word_delimiter_graph - lowercase - asciifolding - cjk_width sudachi_analyzer: tokenizer: sudachi_tokenizer type: custom filter: - english_possessive_stemmer - lowercase - asciifolding - cjk_width - english_stop - english_stemmer - my_posfilter - sudachi_normalizedform tokenizer: sudachi_tokenizer: resources_path: '/etc/elasticsearch/sudachi' split_mode: A type: sudachi_tokenizer discard_punctuation: 'true' statuses: filter: english_stop: type: stop stopwords: _english_ english_stemmer: type: stemmer language: english english_possessive_stemmer: type: stemmer language: possessive_english my_posfilter: type: sudachi_part_of_speech stoptags: - 助詞 - 助動詞 - 補助記号,句点 - 補助記号,読点 analyzer: verbatim: tokenizer: uax_url_email filter: - lowercase content: tokenizer: uax_url_email filter: - english_possessive_stemmer - lowercase - asciifolding - cjk_width - english_stop - english_stemmer hashtag: tokenizer: keyword filter: - word_delimiter_graph - lowercase - asciifolding - cjk_width sudachi_analyzer: tokenizer: sudachi_tokenizer type: custom filter: - english_possessive_stemmer - lowercase - asciifolding - cjk_width - english_stop - english_stemmer - my_posfilter - sudachi_normalizedform tags: analyzer: content: tokenizer: keyword filter: - word_delimiter_graph - lowercase - asciifolding - cjk_width edge_ngram: tokenizer: edge_ngram filter: - lowercase - asciifolding - cjk_width tokenizer: edge_ngram: type: edge_ngram min_gram: 2 max_gram: 15 accounts_analyzers: display_name: analyzer: verbatim edge_ngram: analyzer: edge_ngram search_analyzer: verbatim username: analyzer: verbatim edge_ngram: analyzer: edge_ngram search_analyzer: verbatim text: analyzer: sudachi_analyzer stemmed: analyzer: natural public_statuses_analyzers: text: analyzer: sudachi_analyzer stemmed: analyzer: content tags: analyzer: hashtag statuses_analyzers: text: analyzer: sudachi_analyzer stemmed: analyzer: content tags: analyzer: hashtag tags_analyzers: name: analyzer: content edge_ngram: analyzer: edge_ngram search_analyzer: content