From 22427c4669068095c20d76e671e2d20c4dc8d9bf Mon Sep 17 00:00:00 2001 From: KMY Date: Mon, 28 Aug 2023 10:23:11 +0900 Subject: [PATCH] Improve sudachi settings --- app/chewy/accounts_index.rb | 28 +++++++++++++++++++++++++++- app/chewy/public_statuses_index.rb | 15 ++++++++++++++- app/chewy/statuses_index.rb | 15 ++++++++++++++- app/lib/search_query_transformer.rb | 4 ++-- 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 46ed83487f..0b48777054 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -17,6 +17,16 @@ class AccountsIndex < Chewy::Index type: 'stemmer', language: 'possessive_english', }, + + my_posfilter: { + type: 'sudachi_part_of_speech', + stoptags: [ + '助詞', + '助動詞', + '補助記号,句点', + '補助記号,読点', + ], + }, }, analyzer: { @@ -32,6 +42,15 @@ class AccountsIndex < Chewy::Index ), }, + sudachi_analyzer: { + filter: %w( + my_posfilter + sudachi_normalizedform + ), + type: 'custom', + tokenizer: 'sudachi_tokenizer', + }, + verbatim: { tokenizer: 'standard', filter: %w(lowercase asciifolding cjk_width), @@ -49,6 +68,13 @@ class AccountsIndex < Chewy::Index min_gram: 1, max_gram: 15, }, + + sudachi_tokenizer: { + resources_path: '/etc/elasticsearch/sudachi', + split_mode: 'A', + type: 'sudachi_tokenizer', + discard_punctuation: 'true', + }, }, } @@ -63,6 +89,6 @@ class AccountsIndex < Chewy::Index field(:domain, type: 'keyword', value: ->(account) { account.domain || '' }) field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } - field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } + field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } end end diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index 796e83249f..abc4738398 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -50,6 +50,16 @@ class PublicStatusesIndex < Chewy::Index type: 'stemmer', language: 'possessive_english', }, + + my_posfilter: { + type: 'sudachi_part_of_speech', + stoptags: [ + '助詞', + '助動詞', + '補助記号,句点', + '補助記号,読点', + ], + }, }, analyzer: { @@ -65,7 +75,10 @@ class PublicStatusesIndex < Chewy::Index ), }, sudachi_analyzer: { - filter: [], + filter: %w( + my_posfilter + sudachi_normalizedform + ), type: 'custom', tokenizer: 'sudachi_tokenizer', }, diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 107efe8f77..47ee737c12 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -49,6 +49,16 @@ class StatusesIndex < Chewy::Index type: 'stemmer', language: 'possessive_english', }, + + my_posfilter: { + type: 'sudachi_part_of_speech', + stoptags: [ + '助詞', + '助動詞', + '補助記号,句点', + '補助記号,読点', + ], + }, }, analyzer: { content: { @@ -63,7 +73,10 @@ class StatusesIndex < Chewy::Index ), }, sudachi_analyzer: { - filter: [], + filter: %w( + my_posfilter + sudachi_normalizedform + ), type: 'custom', tokenizer: 'sudachi_tokenizer', }, diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index be11597d2b..2131ee290a 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -25,7 +25,7 @@ class SearchQueryTransformer < Parslet::Transform def clause_to_query(clause) case clause when TermClause - { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } } + { match_phrase: { text: { query: clause.term } } } when PhraseClause { match_phrase: { text: { query: clause.phrase } } } else @@ -134,7 +134,7 @@ class SearchQueryTransformer < Parslet::Transform if clause[:prefix] PrefixClause.new(prefix, clause[:term].to_s) elsif clause[:term] - PhraseClause.new(prefix, operator, clause[:term].to_s) + TermClause.new(prefix, operator, clause[:term].to_s) elsif clause[:shortcode] TermClause.new(prefix, operator, ":#{clause[:term]}:") elsif clause[:phrase]