From 5d75bf984624507824475441d7055aaa54e59e47 Mon Sep 17 00:00:00 2001 From: jsgoldstein Date: Thu, 29 Jun 2023 07:05:21 -0400 Subject: [PATCH] Change account search to match by text when opted-in (#25599) Co-authored-by: Eugen Rochko --- app/chewy/accounts_index.rb | 52 +++++++++++++++++++------- app/models/concerns/account_search.rb | 11 ++++++ app/services/account_search_service.rb | 51 ++++++++++++++++++------- app/services/search_service.rb | 3 +- spec/services/search_service_spec.rb | 2 +- 5 files changed, 90 insertions(+), 29 deletions(-) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index e38e14a106..abde8e92f1 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -2,8 +2,37 @@ class AccountsIndex < Chewy::Index settings index: { refresh_interval: '30s' }, analysis: { + filter: { + english_stop: { + type: 'stop', + stopwords: '_english_', + }, + + english_stemmer: { + type: 'stemmer', + language: 'english', + }, + + english_possessive_stemmer: { + type: 'stemmer', + language: 'possessive_english', + }, + }, + analyzer: { - content: { + natural: { + tokenizer: 'uax_url_email', + filter: %w( + english_possessive_stemmer + lowercase + asciifolding + cjk_width + english_stop + english_stemmer + ), + }, + + verbatim: { tokenizer: 'whitespace', filter: %w(lowercase asciifolding cjk_width), }, @@ -26,18 +55,13 @@ class AccountsIndex < Chewy::Index index_scope ::Account.searchable.includes(:account_stat) root date_detection: false do - field :id, type: 'long' - - field :display_name, type: 'text', analyzer: 'content' do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end - - field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end - - field :following_count, type: 'long', value: ->(account) { account.following_count } - field :followers_count, type: 'long', value: ->(account) { account.followers_count } - field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } + field(:id, type: 'long') + field(:following_count, type: 'long') + field(:followers_count, type: 'long') + field(:properties, type: 'keyword', value: ->(account) { account.searchable_properties }) + field(:last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }) + field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } + field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } + field(:text, type: 'text', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } end end diff --git a/app/models/concerns/account_search.rb b/app/models/concerns/account_search.rb index 67d77793fe..46cf68e1a3 100644 --- a/app/models/concerns/account_search.rb +++ b/app/models/concerns/account_search.rb @@ -106,6 +106,17 @@ module AccountSearch LIMIT :limit OFFSET :offset SQL + def searchable_text + PlainTextFormatter.new(note, local?).to_s if discoverable? + end + + def searchable_properties + [].tap do |properties| + properties << 'bot' if bot? + properties << 'verified' if fields.any?(&:verified?) + end + end + class_methods do def search_for(terms, limit: 10, offset: 0) tsquery = generate_query_for_search(terms) diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index dfc3a45f8f..3c9e73c124 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -9,12 +9,11 @@ class AccountSearchService < BaseService MIN_QUERY_LENGTH = 5 def call(query, account = nil, options = {}) - @acct_hint = query&.start_with?('@') - @query = query&.strip&.gsub(/\A@/, '') - @limit = options[:limit].to_i - @offset = options[:offset].to_i - @options = options - @account = account + @query = query&.strip&.gsub(/\A@/, '') + @limit = options[:limit].to_i + @offset = options[:offset].to_i + @options = options + @account = account search_service_results.compact.uniq end @@ -72,8 +71,8 @@ class AccountSearchService < BaseService end def from_elasticsearch - must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct.edge_ngram acct) : %w(acct.edge_ngram acct display_name.edge_ngram display_name), type: 'most_fields', operator: 'and' } }] - should_clauses = [] + must_clauses = must_clause + should_clauses = should_clause if account return [] if options[:following] && following_ids.empty? @@ -88,7 +87,7 @@ class AccountSearchService < BaseService query = { bool: { must: must_clauses, should: should_clauses } } functions = [reputation_score_function, followers_score_function, time_distance_function] - records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' }) + records = AccountsIndex.query(function_score: { query: query, functions: functions }) .limit(limit_for_non_exact_results) .offset(offset) .objects @@ -133,6 +132,36 @@ class AccountSearchService < BaseService } end + def must_clause + fields = %w(username username.* display_name display_name.*) + fields << 'text' << 'text.*' if options[:use_searchable_text] + + [ + { + multi_match: { + query: terms_for_query, + fields: fields, + type: 'best_fields', + operator: 'or', + }, + }, + ] + end + + def should_clause + [ + { + multi_match: { + query: terms_for_query, + fields: %w(username username.* display_name display_name.*), + type: 'best_fields', + operator: 'and', + boost: 10, + }, + }, + ] + end + def following_ids @following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id] end @@ -182,8 +211,4 @@ class AccountSearchService < BaseService def username_complete? query.include?('@') && "@#{query}".match?(MENTION_ONLY_RE) end - - def likely_acct? - @acct_hint || username_complete? - end end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index f475f81536..dad8c0b28f 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -30,7 +30,8 @@ class SearchService < BaseService @account, limit: @limit, resolve: @resolve, - offset: @offset + offset: @offset, + use_searchable_text: true ) end diff --git a/spec/services/search_service_spec.rb b/spec/services/search_service_spec.rb index 1283a23bf1..3bf7f8ce9f 100644 --- a/spec/services/search_service_spec.rb +++ b/spec/services/search_service_spec.rb @@ -68,7 +68,7 @@ describe SearchService, type: :service do allow(AccountSearchService).to receive(:new).and_return(service) results = subject.call(query, nil, 10) - expect(service).to have_received(:call).with(query, nil, limit: 10, offset: 0, resolve: false) + expect(service).to have_received(:call).with(query, nil, limit: 10, offset: 0, resolve: false, use_searchable_text: true) expect(results).to eq empty_results.merge(accounts: [account]) end end