Improve sudachi settings

This commit is contained in:
KMY 2023-08-28 10:23:11 +09:00
parent eba4feddf8
commit 22427c4669
4 changed files with 57 additions and 5 deletions

View file

@ -17,6 +17,16 @@ class AccountsIndex < Chewy::Index
type: 'stemmer', type: 'stemmer',
language: 'possessive_english', language: 'possessive_english',
}, },
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
}, },
analyzer: { analyzer: {
@ -32,6 +42,15 @@ class AccountsIndex < Chewy::Index
), ),
}, },
sudachi_analyzer: {
filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom',
tokenizer: 'sudachi_tokenizer',
},
verbatim: { verbatim: {
tokenizer: 'standard', tokenizer: 'standard',
filter: %w(lowercase asciifolding cjk_width), filter: %w(lowercase asciifolding cjk_width),
@ -49,6 +68,13 @@ class AccountsIndex < Chewy::Index
min_gram: 1, min_gram: 1,
max_gram: 15, max_gram: 15,
}, },
sudachi_tokenizer: {
resources_path: '/etc/elasticsearch/sudachi',
split_mode: 'A',
type: 'sudachi_tokenizer',
discard_punctuation: 'true',
},
}, },
} }
@ -63,6 +89,6 @@ class AccountsIndex < Chewy::Index
field(:domain, type: 'keyword', value: ->(account) { account.domain || '' }) field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
end end
end end

View file

@ -50,6 +50,16 @@ class PublicStatusesIndex < Chewy::Index
type: 'stemmer', type: 'stemmer',
language: 'possessive_english', language: 'possessive_english',
}, },
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
}, },
analyzer: { analyzer: {
@ -65,7 +75,10 @@ class PublicStatusesIndex < Chewy::Index
), ),
}, },
sudachi_analyzer: { sudachi_analyzer: {
filter: [], filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom', type: 'custom',
tokenizer: 'sudachi_tokenizer', tokenizer: 'sudachi_tokenizer',
}, },

View file

@ -49,6 +49,16 @@ class StatusesIndex < Chewy::Index
type: 'stemmer', type: 'stemmer',
language: 'possessive_english', language: 'possessive_english',
}, },
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
}, },
analyzer: { analyzer: {
content: { content: {
@ -63,7 +73,10 @@ class StatusesIndex < Chewy::Index
), ),
}, },
sudachi_analyzer: { sudachi_analyzer: {
filter: [], filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom', type: 'custom',
tokenizer: 'sudachi_tokenizer', tokenizer: 'sudachi_tokenizer',
}, },

View file

@ -25,7 +25,7 @@ class SearchQueryTransformer < Parslet::Transform
def clause_to_query(clause) def clause_to_query(clause)
case clause case clause
when TermClause when TermClause
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } } { match_phrase: { text: { query: clause.term } } }
when PhraseClause when PhraseClause
{ match_phrase: { text: { query: clause.phrase } } } { match_phrase: { text: { query: clause.phrase } } }
else else
@ -134,7 +134,7 @@ class SearchQueryTransformer < Parslet::Transform
if clause[:prefix] if clause[:prefix]
PrefixClause.new(prefix, clause[:term].to_s) PrefixClause.new(prefix, clause[:term].to_s)
elsif clause[:term] elsif clause[:term]
PhraseClause.new(prefix, operator, clause[:term].to_s) TermClause.new(prefix, operator, clause[:term].to_s)
elsif clause[:shortcode] elsif clause[:shortcode]
TermClause.new(prefix, operator, ":#{clause[:term]}:") TermClause.new(prefix, operator, ":#{clause[:term]}:")
elsif clause[:phrase] elsif clause[:phrase]