From 38b2974a83a0fcbfffb01f87c332d70dd4f49ef9 Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Tue, 2 May 2023 12:21:36 -0400 Subject: [PATCH] Extract AccountSearch concern from Account (#24716) --- .rubocop_todo.yml | 2 - app/models/account.rb | 98 +----------------- app/models/concerns/account_search.rb | 140 ++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 99 deletions(-) create mode 100644 app/models/concerns/account_search.rb diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 5e2e9089277..44361fd6f01 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1994,7 +1994,6 @@ Style/HashAsLastArrayItem: Exclude: - 'app/controllers/admin/statuses_controller.rb' - 'app/controllers/api/v1/statuses_controller.rb' - - 'app/models/account.rb' - 'app/models/concerns/account_counters.rb' - 'app/models/concerns/status_threading_concern.rb' - 'app/models/status.rb' @@ -2068,7 +2067,6 @@ Style/MapToHash: # SupportedStyles: literals, strict Style/MutableConstant: Exclude: - - 'app/models/account.rb' - 'app/models/tag.rb' - 'app/services/delete_account_service.rb' - 'config/initializers/twitter_regex.rb' diff --git a/app/models/account.rb b/app/models/account.rb index 8b0f49a9b8c..7f0c2a7d29f 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -78,6 +78,7 @@ class Account < ApplicationRecord include DomainNormalizable include DomainMaterializable include AccountMerging + include AccountSearch enum protocol: { ostatus: 0, activitypub: 1 } enum suspension_origin: { local: 0, remote: 1 }, _prefix: true @@ -410,14 +411,6 @@ class Account < ApplicationRecord end class << self - DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/ - TEXTSEARCH = "(setweight(to_tsvector('simple', accounts.display_name), 'A') || setweight(to_tsvector('simple', accounts.username), 'B') || setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C'))" - - REPUTATION_SCORE_FUNCTION = '(greatest(0, coalesce(s.followers_count, 0)) / (greatest(0, coalesce(s.following_count, 0)) + 1.0))' - FOLLOWERS_SCORE_FUNCTION = 'log(greatest(0, coalesce(s.followers_count, 0)) + 2)' - TIME_DISTANCE_FUNCTION = '(case when s.last_status_at is null then 0 else exp(-1.0 * ((greatest(0, abs(extract(DAY FROM age(s.last_status_at))) - 30.0)^2) / (2.0 * ((-1.0 * 30^2) / (2.0 * ln(0.3)))))) end)' - BOOST = "((#{REPUTATION_SCORE_FUNCTION} + #{FOLLOWERS_SCORE_FUNCTION} + #{TIME_DISTANCE_FUNCTION}) / 3.0)" - def readonly_attributes super - %w(statuses_count following_count followers_count) end @@ -427,37 +420,6 @@ class Account < ApplicationRecord DeliveryFailureTracker.without_unavailable(urls) end - def search_for(terms, limit: 10, offset: 0) - tsquery = generate_query_for_search(terms) - - sql = <<-SQL.squish - SELECT - accounts.*, - #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank - FROM accounts - LEFT JOIN users ON accounts.id = users.account_id - LEFT JOIN account_stats AS s ON accounts.id = s.account_id - WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} - AND accounts.suspended_at IS NULL - AND accounts.moved_to_account_id IS NULL - AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL)) - ORDER BY rank DESC - LIMIT :limit OFFSET :offset - SQL - - records = find_by_sql([sql, limit: limit, offset: offset, tsquery: tsquery]) - ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) - records - end - - def advanced_search_for(terms, account, limit: 10, following: false, offset: 0) - tsquery = generate_query_for_search(terms) - sql = advanced_search_for_sql_template(following) - records = find_by_sql([sql, id: account.id, limit: limit, offset: offset, tsquery: tsquery]) - ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) - records - end - def from_text(text) return [] if text.blank? @@ -471,64 +433,6 @@ class Account < ApplicationRecord EntityCache.instance.mention(username, domain) end end - - private - - def generate_query_for_search(unsanitized_terms) - terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ') - - # The final ":*" is for prefix search. - # The trailing space does not seem to fit any purpose, but `to_tsquery` - # behaves differently with and without a leading space if the terms start - # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep - # the same query. - "' #{terms} ':*" - end - - def advanced_search_for_sql_template(following) - if following - <<-SQL.squish - WITH first_degree AS ( - SELECT target_account_id - FROM follows - WHERE account_id = :id - UNION ALL - SELECT :id - ) - SELECT - accounts.*, - (count(f.id) + 1) * #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank - FROM accounts - LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) - LEFT JOIN account_stats AS s ON accounts.id = s.account_id - WHERE accounts.id IN (SELECT * FROM first_degree) - AND to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} - AND accounts.suspended_at IS NULL - AND accounts.moved_to_account_id IS NULL - GROUP BY accounts.id, s.id - ORDER BY rank DESC - LIMIT :limit OFFSET :offset - SQL - else - <<-SQL.squish - SELECT - accounts.*, - #{BOOST} * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank, - count(f.id) AS followed - FROM accounts - LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id) - LEFT JOIN users ON accounts.id = users.account_id - LEFT JOIN account_stats AS s ON accounts.id = s.account_id - WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} - AND accounts.suspended_at IS NULL - AND accounts.moved_to_account_id IS NULL - AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL)) - GROUP BY accounts.id, s.id - ORDER BY followed DESC, rank DESC - LIMIT :limit OFFSET :offset - SQL - end - end end def emojis diff --git a/app/models/concerns/account_search.rb b/app/models/concerns/account_search.rb new file mode 100644 index 00000000000..67d77793fef --- /dev/null +++ b/app/models/concerns/account_search.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +module AccountSearch + extend ActiveSupport::Concern + + DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/ + + TEXT_SEARCH_RANKS = <<~SQL.squish + ( + setweight(to_tsvector('simple', accounts.display_name), 'A') || + setweight(to_tsvector('simple', accounts.username), 'B') || + setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C') + ) + SQL + + REPUTATION_SCORE_FUNCTION = <<~SQL.squish + ( + greatest(0, coalesce(s.followers_count, 0)) / ( + greatest(0, coalesce(s.following_count, 0)) + 1.0 + ) + ) + SQL + + FOLLOWERS_SCORE_FUNCTION = <<~SQL.squish + log( + greatest(0, coalesce(s.followers_count, 0)) + 2 + ) + SQL + + TIME_DISTANCE_FUNCTION = <<~SQL.squish + ( + case + when s.last_status_at is null then 0 + else exp( + -1.0 * ( + ( + greatest(0, abs(extract(DAY FROM age(s.last_status_at))) - 30.0)^2) /#{' '} + (2.0 * ((-1.0 * 30^2) / (2.0 * ln(0.3))) + ) + ) + ) + end + ) + SQL + + BOOST = <<~SQL.squish + ( + (#{REPUTATION_SCORE_FUNCTION} + #{FOLLOWERS_SCORE_FUNCTION} + #{TIME_DISTANCE_FUNCTION}) / 3.0 + ) + SQL + + BASIC_SEARCH_SQL = <<~SQL.squish + SELECT + accounts.*, + #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank + FROM accounts + LEFT JOIN users ON accounts.id = users.account_id + LEFT JOIN account_stats AS s ON accounts.id = s.account_id + WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS} + AND accounts.suspended_at IS NULL + AND accounts.moved_to_account_id IS NULL + AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL)) + ORDER BY rank DESC + LIMIT :limit OFFSET :offset + SQL + + ADVANCED_SEARCH_WITH_FOLLOWING = <<~SQL.squish + WITH first_degree AS ( + SELECT target_account_id + FROM follows + WHERE account_id = :id + UNION ALL + SELECT :id + ) + SELECT + accounts.*, + (count(f.id) + 1) * #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank + FROM accounts + LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) + LEFT JOIN account_stats AS s ON accounts.id = s.account_id + WHERE accounts.id IN (SELECT * FROM first_degree) + AND to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS} + AND accounts.suspended_at IS NULL + AND accounts.moved_to_account_id IS NULL + GROUP BY accounts.id, s.id + ORDER BY rank DESC + LIMIT :limit OFFSET :offset + SQL + + ADVANCED_SEARCH_WITHOUT_FOLLOWING = <<~SQL.squish + SELECT + accounts.*, + #{BOOST} * ts_rank_cd(#{TEXT_SEARCH_RANKS}, to_tsquery('simple', :tsquery), 32) AS rank, + count(f.id) AS followed + FROM accounts + LEFT OUTER JOIN follows AS f ON + (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id) + LEFT JOIN users ON accounts.id = users.account_id + LEFT JOIN account_stats AS s ON accounts.id = s.account_id + WHERE to_tsquery('simple', :tsquery) @@ #{TEXT_SEARCH_RANKS} + AND accounts.suspended_at IS NULL + AND accounts.moved_to_account_id IS NULL + AND (accounts.domain IS NOT NULL OR (users.approved = TRUE AND users.confirmed_at IS NOT NULL)) + GROUP BY accounts.id, s.id + ORDER BY followed DESC, rank DESC + LIMIT :limit OFFSET :offset + SQL + + class_methods do + def search_for(terms, limit: 10, offset: 0) + tsquery = generate_query_for_search(terms) + + find_by_sql([BASIC_SEARCH_SQL, { limit: limit, offset: offset, tsquery: tsquery }]).tap do |records| + ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) + end + end + + def advanced_search_for(terms, account, limit: 10, following: false, offset: 0) + tsquery = generate_query_for_search(terms) + sql_template = following ? ADVANCED_SEARCH_WITH_FOLLOWING : ADVANCED_SEARCH_WITHOUT_FOLLOWING + + find_by_sql([sql_template, { id: account.id, limit: limit, offset: offset, tsquery: tsquery }]).tap do |records| + ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) + end + end + + private + + def generate_query_for_search(unsanitized_terms) + terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ') + + # The final ":*" is for prefix search. + # The trailing space does not seem to fit any purpose, but `to_tsquery` + # behaves differently with and without a leading space if the terms start + # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep + # the same query. + "' #{terms} ':*" + end + end +end