Add more accurate hashtag search (#11579)

* Add more accurate hashtag search

Using ElasticSearch to index hashtags with edge n-grams and score
them by usage within the last 7 days since last activity. Only
hashtags that have been reviewed and are listable can appear in
searches, unless they match the query exactly

* Fix search analyzer dropping non-ascii characters
This commit is contained in:
Eugen Rochko 2019-08-18 03:45:51 +02:00 committed by GitHub
parent 3a77090d01
commit cc0a55cf9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 149 additions and 13 deletions

View file

@ -109,7 +109,7 @@ class AccountSearchService < BaseService
field_value_factor: {
field: 'followers_count',
modifier: 'log2p',
missing: 1,
missing: 0,
},
}
end

View file

@ -57,10 +57,10 @@ class SearchService < BaseService
end
def perform_hashtags_search!
Tag.search_for(
@query.gsub(/\A#/, ''),
@limit,
@offset
TagSearchService.new.call(
@query,
limit: @limit,
offset: @offset
)
end

View file

@ -0,0 +1,82 @@
# frozen_string_literal: true
class TagSearchService < BaseService
def call(query, options = {})
@query = query.strip.gsub(/\A#/, '')
@offset = options[:offset].to_i
@limit = options[:limit].to_i
if Chewy.enabled?
from_elasticsearch
else
from_database
end
end
private
def from_elasticsearch
query = {
function_score: {
query: {
multi_match: {
query: @query,
fields: %w(name.edge_ngram name),
type: 'most_fields',
operator: 'and',
},
},
functions: [
{
field_value_factor: {
field: 'usage',
modifier: 'log2p',
missing: 0,
},
},
{
gauss: {
last_status_at: {
scale: '7d',
offset: '14d',
decay: 0.5,
},
},
},
],
boost_mode: 'multiply',
},
}
filter = {
bool: {
should: [
{
term: {
reviewed: {
value: true,
},
},
},
{
term: {
name: {
value: @query,
},
},
},
],
},
}
TagsIndex.query(query).filter(filter).limit(@limit).offset(@offset).objects.compact
end
def from_database
Tag.search_for(@query, @limit, @offset)
end
end