Improve sudachi settings

This commit is contained in:
KMY 2023-08-28 10:23:11 +09:00
parent eba4feddf8
commit 22427c4669
4 changed files with 57 additions and 5 deletions

View file

@ -17,6 +17,16 @@ class AccountsIndex < Chewy::Index
type: 'stemmer',
language: 'possessive_english',
},
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},
analyzer: {
@ -32,6 +42,15 @@ class AccountsIndex < Chewy::Index
),
},
sudachi_analyzer: {
filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom',
tokenizer: 'sudachi_tokenizer',
},
verbatim: {
tokenizer: 'standard',
filter: %w(lowercase asciifolding cjk_width),
@ -49,6 +68,13 @@ class AccountsIndex < Chewy::Index
min_gram: 1,
max_gram: 15,
},
sudachi_tokenizer: {
resources_path: '/etc/elasticsearch/sudachi',
split_mode: 'A',
type: 'sudachi_tokenizer',
discard_punctuation: 'true',
},
},
}
@ -63,6 +89,6 @@ class AccountsIndex < Chewy::Index
field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
end
end

View file

@ -50,6 +50,16 @@ class PublicStatusesIndex < Chewy::Index
type: 'stemmer',
language: 'possessive_english',
},
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},
analyzer: {
@ -65,7 +75,10 @@ class PublicStatusesIndex < Chewy::Index
),
},
sudachi_analyzer: {
filter: [],
filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom',
tokenizer: 'sudachi_tokenizer',
},

View file

@ -49,6 +49,16 @@ class StatusesIndex < Chewy::Index
type: 'stemmer',
language: 'possessive_english',
},
my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},
analyzer: {
content: {
@ -63,7 +73,10 @@ class StatusesIndex < Chewy::Index
),
},
sudachi_analyzer: {
filter: [],
filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom',
tokenizer: 'sudachi_tokenizer',
},

View file

@ -25,7 +25,7 @@ class SearchQueryTransformer < Parslet::Transform
def clause_to_query(clause)
case clause
when TermClause
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
{ match_phrase: { text: { query: clause.term } } }
when PhraseClause
{ match_phrase: { text: { query: clause.phrase } } }
else
@ -134,7 +134,7 @@ class SearchQueryTransformer < Parslet::Transform
if clause[:prefix]
PrefixClause.new(prefix, clause[:term].to_s)
elsif clause[:term]
PhraseClause.new(prefix, operator, clause[:term].to_s)
TermClause.new(prefix, operator, clause[:term].to_s)
elsif clause[:shortcode]
TermClause.new(prefix, operator, ":#{clause[:term]}:")
elsif clause[:phrase]