Improve sudachi settings
This commit is contained in:
parent
eba4feddf8
commit
22427c4669
4 changed files with 57 additions and 5 deletions
|
@ -17,6 +17,16 @@ class AccountsIndex < Chewy::Index
|
||||||
type: 'stemmer',
|
type: 'stemmer',
|
||||||
language: 'possessive_english',
|
language: 'possessive_english',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
my_posfilter: {
|
||||||
|
type: 'sudachi_part_of_speech',
|
||||||
|
stoptags: [
|
||||||
|
'助詞',
|
||||||
|
'助動詞',
|
||||||
|
'補助記号,句点',
|
||||||
|
'補助記号,読点',
|
||||||
|
],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
analyzer: {
|
analyzer: {
|
||||||
|
@ -32,6 +42,15 @@ class AccountsIndex < Chewy::Index
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
sudachi_analyzer: {
|
||||||
|
filter: %w(
|
||||||
|
my_posfilter
|
||||||
|
sudachi_normalizedform
|
||||||
|
),
|
||||||
|
type: 'custom',
|
||||||
|
tokenizer: 'sudachi_tokenizer',
|
||||||
|
},
|
||||||
|
|
||||||
verbatim: {
|
verbatim: {
|
||||||
tokenizer: 'standard',
|
tokenizer: 'standard',
|
||||||
filter: %w(lowercase asciifolding cjk_width),
|
filter: %w(lowercase asciifolding cjk_width),
|
||||||
|
@ -49,6 +68,13 @@ class AccountsIndex < Chewy::Index
|
||||||
min_gram: 1,
|
min_gram: 1,
|
||||||
max_gram: 15,
|
max_gram: 15,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
sudachi_tokenizer: {
|
||||||
|
resources_path: '/etc/elasticsearch/sudachi',
|
||||||
|
split_mode: 'A',
|
||||||
|
type: 'sudachi_tokenizer',
|
||||||
|
discard_punctuation: 'true',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +89,6 @@ class AccountsIndex < Chewy::Index
|
||||||
field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
|
field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
|
||||||
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
|
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
|
||||||
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
|
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
|
||||||
field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
|
field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -50,6 +50,16 @@ class PublicStatusesIndex < Chewy::Index
|
||||||
type: 'stemmer',
|
type: 'stemmer',
|
||||||
language: 'possessive_english',
|
language: 'possessive_english',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
my_posfilter: {
|
||||||
|
type: 'sudachi_part_of_speech',
|
||||||
|
stoptags: [
|
||||||
|
'助詞',
|
||||||
|
'助動詞',
|
||||||
|
'補助記号,句点',
|
||||||
|
'補助記号,読点',
|
||||||
|
],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
analyzer: {
|
analyzer: {
|
||||||
|
@ -65,7 +75,10 @@ class PublicStatusesIndex < Chewy::Index
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
sudachi_analyzer: {
|
sudachi_analyzer: {
|
||||||
filter: [],
|
filter: %w(
|
||||||
|
my_posfilter
|
||||||
|
sudachi_normalizedform
|
||||||
|
),
|
||||||
type: 'custom',
|
type: 'custom',
|
||||||
tokenizer: 'sudachi_tokenizer',
|
tokenizer: 'sudachi_tokenizer',
|
||||||
},
|
},
|
||||||
|
|
|
@ -49,6 +49,16 @@ class StatusesIndex < Chewy::Index
|
||||||
type: 'stemmer',
|
type: 'stemmer',
|
||||||
language: 'possessive_english',
|
language: 'possessive_english',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
my_posfilter: {
|
||||||
|
type: 'sudachi_part_of_speech',
|
||||||
|
stoptags: [
|
||||||
|
'助詞',
|
||||||
|
'助動詞',
|
||||||
|
'補助記号,句点',
|
||||||
|
'補助記号,読点',
|
||||||
|
],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
analyzer: {
|
analyzer: {
|
||||||
content: {
|
content: {
|
||||||
|
@ -63,7 +73,10 @@ class StatusesIndex < Chewy::Index
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
sudachi_analyzer: {
|
sudachi_analyzer: {
|
||||||
filter: [],
|
filter: %w(
|
||||||
|
my_posfilter
|
||||||
|
sudachi_normalizedform
|
||||||
|
),
|
||||||
type: 'custom',
|
type: 'custom',
|
||||||
tokenizer: 'sudachi_tokenizer',
|
tokenizer: 'sudachi_tokenizer',
|
||||||
},
|
},
|
||||||
|
|
|
@ -25,7 +25,7 @@ class SearchQueryTransformer < Parslet::Transform
|
||||||
def clause_to_query(clause)
|
def clause_to_query(clause)
|
||||||
case clause
|
case clause
|
||||||
when TermClause
|
when TermClause
|
||||||
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
|
{ match_phrase: { text: { query: clause.term } } }
|
||||||
when PhraseClause
|
when PhraseClause
|
||||||
{ match_phrase: { text: { query: clause.phrase } } }
|
{ match_phrase: { text: { query: clause.phrase } } }
|
||||||
else
|
else
|
||||||
|
@ -134,7 +134,7 @@ class SearchQueryTransformer < Parslet::Transform
|
||||||
if clause[:prefix]
|
if clause[:prefix]
|
||||||
PrefixClause.new(prefix, clause[:term].to_s)
|
PrefixClause.new(prefix, clause[:term].to_s)
|
||||||
elsif clause[:term]
|
elsif clause[:term]
|
||||||
PhraseClause.new(prefix, operator, clause[:term].to_s)
|
TermClause.new(prefix, operator, clause[:term].to_s)
|
||||||
elsif clause[:shortcode]
|
elsif clause[:shortcode]
|
||||||
TermClause.new(prefix, operator, ":#{clause[:term]}:")
|
TermClause.new(prefix, operator, ":#{clause[:term]}:")
|
||||||
elsif clause[:phrase]
|
elsif clause[:phrase]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue