From 22427c4669068095c20d76e671e2d20c4dc8d9bf Mon Sep 17 00:00:00 2001
From: KMY <tt@kmycode.net>
Date: Mon, 28 Aug 2023 10:23:11 +0900
Subject: [PATCH] Improve sudachi settings

---
 app/chewy/accounts_index.rb         | 28 +++++++++++++++++++++++++++-
 app/chewy/public_statuses_index.rb  | 15 ++++++++++++++-
 app/chewy/statuses_index.rb         | 15 ++++++++++++++-
 app/lib/search_query_transformer.rb |  4 ++--
 4 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb
index 46ed83487f..0b48777054 100644
--- a/app/chewy/accounts_index.rb
+++ b/app/chewy/accounts_index.rb
@@ -17,6 +17,16 @@ class AccountsIndex < Chewy::Index
         type: 'stemmer',
         language: 'possessive_english',
       },
+
+      my_posfilter: {
+        type: 'sudachi_part_of_speech',
+        stoptags: [
+          '助詞',
+          '助動詞',
+          '補助記号,句点',
+          '補助記号,読点',
+        ],
+      },
     },
 
     analyzer: {
@@ -32,6 +42,15 @@ class AccountsIndex < Chewy::Index
         ),
       },
 
+      sudachi_analyzer: {
+        filter: %w(
+          my_posfilter
+          sudachi_normalizedform
+        ),
+        type: 'custom',
+        tokenizer: 'sudachi_tokenizer',
+      },
+
       verbatim: {
         tokenizer: 'standard',
         filter: %w(lowercase asciifolding cjk_width),
@@ -49,6 +68,13 @@ class AccountsIndex < Chewy::Index
         min_gram: 1,
         max_gram: 15,
       },
+
+      sudachi_tokenizer: {
+        resources_path: '/etc/elasticsearch/sudachi',
+        split_mode: 'A',
+        type: 'sudachi_tokenizer',
+        discard_punctuation: 'true',
+      },
     },
   }
 
@@ -63,6 +89,6 @@ class AccountsIndex < Chewy::Index
     field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
     field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
     field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
-    field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
+    field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
   end
 end
diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb
index 796e83249f..abc4738398 100644
--- a/app/chewy/public_statuses_index.rb
+++ b/app/chewy/public_statuses_index.rb
@@ -50,6 +50,16 @@ class PublicStatusesIndex < Chewy::Index
         type: 'stemmer',
         language: 'possessive_english',
       },
+
+      my_posfilter: {
+        type: 'sudachi_part_of_speech',
+        stoptags: [
+          '助詞',
+          '助動詞',
+          '補助記号,句点',
+          '補助記号,読点',
+        ],
+      },
     },
 
     analyzer: {
@@ -65,7 +75,10 @@ class PublicStatusesIndex < Chewy::Index
         ),
       },
       sudachi_analyzer: {
-        filter: [],
+        filter: %w(
+          my_posfilter
+          sudachi_normalizedform
+        ),
         type: 'custom',
         tokenizer: 'sudachi_tokenizer',
       },
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
index 107efe8f77..47ee737c12 100644
--- a/app/chewy/statuses_index.rb
+++ b/app/chewy/statuses_index.rb
@@ -49,6 +49,16 @@ class StatusesIndex < Chewy::Index
         type: 'stemmer',
         language: 'possessive_english',
       },
+
+      my_posfilter: {
+        type: 'sudachi_part_of_speech',
+        stoptags: [
+          '助詞',
+          '助動詞',
+          '補助記号,句点',
+          '補助記号,読点',
+        ],
+      },
     },
     analyzer: {
       content: {
@@ -63,7 +73,10 @@ class StatusesIndex < Chewy::Index
         ),
       },
       sudachi_analyzer: {
-        filter: [],
+        filter: %w(
+          my_posfilter
+          sudachi_normalizedform
+        ),
         type: 'custom',
         tokenizer: 'sudachi_tokenizer',
       },
diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb
index be11597d2b..2131ee290a 100644
--- a/app/lib/search_query_transformer.rb
+++ b/app/lib/search_query_transformer.rb
@@ -25,7 +25,7 @@ class SearchQueryTransformer < Parslet::Transform
     def clause_to_query(clause)
       case clause
       when TermClause
-        { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
+        { match_phrase: { text: { query: clause.term } } }
       when PhraseClause
         { match_phrase: { text: { query: clause.phrase } } }
       else
@@ -134,7 +134,7 @@ class SearchQueryTransformer < Parslet::Transform
     if clause[:prefix]
       PrefixClause.new(prefix, clause[:term].to_s)
     elsif clause[:term]
-      PhraseClause.new(prefix, operator, clause[:term].to_s)
+      TermClause.new(prefix, operator, clause[:term].to_s)
     elsif clause[:shortcode]
       TermClause.new(prefix, operator, ":#{clause[:term]}:")
     elsif clause[:phrase]