Change trending statuses to only show one status from each account ()

Calculate trends in temporary sets to avoid having to manage items
that go below the decay threshold while not having any moments
where a half-processed set is accessible to end-users
remotes/1727458204337373841/tmp_refs/heads/signup-info-prompt
Eugen Rochko 2022-04-29 22:42:42 +02:00 committed by GitHub
parent 5c691430e5
commit 6476f7e4da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 83 deletions

View File

@ -64,28 +64,25 @@ class Trends::Base
redis.expire(used_key(at_time), 1.day.seconds) redis.expire(used_key(at_time), 1.day.seconds)
end end
def trim_older_items
redis.zremrangebyscore("#{key_prefix}:all", '-inf', '(0.3')
redis.zremrangebyscore("#{key_prefix}:allowed", '-inf', '(0.3')
end
def score_at_rank(rank) def score_at_rank(rank)
redis.zrevrange("#{key_prefix}:allowed", 0, rank, with_scores: true).last&.last || 0 redis.zrevrange("#{key_prefix}:allowed", 0, rank, with_scores: true).last&.last || 0
end end
# @param [Integer] id def replace_items(suffix, items)
# @param [Float] score tmp_prefix = "#{key_prefix}:tmp:#{SecureRandom.alphanumeric(6)}#{suffix}"
# @param [Hash<String, Boolean>] subsets allowed_items = filter_for_allowed_items(items)
def add_to_and_remove_from_subsets(id, score, subsets = {})
subsets.each_key do |subset|
key = [key_prefix, subset].compact.join(':')
if score.positive? && subsets[subset] redis.pipelined do |pipeline|
redis.zadd(key, score, id) items.each { |item| pipeline.zadd("#{tmp_prefix}:all", item[:score], item[:item].id) }
else allowed_items.each { |item| pipeline.zadd("#{tmp_prefix}:allowed", item[:score], item[:item].id) }
redis.zrem(key, id)
rename_set(pipeline, "#{tmp_prefix}:all", "#{key_prefix}:all#{suffix}", items)
rename_set(pipeline, "#{tmp_prefix}:allowed", "#{key_prefix}:allowed#{suffix}", allowed_items)
end end
end end
def filter_for_allowed_items(items)
raise NotImplementedError
end end
private private
@ -93,4 +90,12 @@ class Trends::Base
def used_key(at_time) def used_key(at_time)
"#{key_prefix}:used:#{at_time.beginning_of_day.to_i}" "#{key_prefix}:used:#{at_time.beginning_of_day.to_i}"
end end
def rename_set(pipeline, from_key, to_key, set_items)
if set_items.empty?
pipeline.del(to_key)
else
pipeline.rename(from_key, to_key)
end
end
end end

View File

@ -8,14 +8,15 @@ class Trends::Links < Trends::Base
review_threshold: 3, review_threshold: 3,
max_score_cooldown: 2.days.freeze, max_score_cooldown: 2.days.freeze,
max_score_halflife: 8.hours.freeze, max_score_halflife: 8.hours.freeze,
decay_threshold: 1,
} }
def register(status, at_time = Time.now.utc) def register(status, at_time = Time.now.utc)
original_status = status.reblog? ? status.reblog : status original_status = status.proper
return unless original_status.public_visibility? && status.public_visibility? && return unless (original_status.public_visibility? && status.public_visibility?) &&
!original_status.account.silenced? && !status.account.silenced? && !(original_status.account.silenced? || status.account.silenced?) &&
!original_status.spoiler_text? !(original_status.spoiler_text? || original_status.sensitive?)
original_status.preview_cards.each do |preview_card| original_status.preview_cards.each do |preview_card|
add(preview_card, status.account_id, at_time) if preview_card.appropriate_for_trends? add(preview_card, status.account_id, at_time) if preview_card.appropriate_for_trends?
@ -61,6 +62,9 @@ class Trends::Links < Trends::Base
private private
def calculate_scores(preview_cards, at_time) def calculate_scores(preview_cards, at_time)
global_items = []
locale_items = Hash.new { |h, key| h[key] = [] }
preview_cards.each do |preview_card| preview_cards.each do |preview_card|
expected = preview_card.history.get(at_time - 1.day).accounts.to_f expected = preview_card.history.get(at_time - 1.day).accounts.to_f
expected = 1.0 if expected.zero? expected = 1.0 if expected.zero?
@ -87,31 +91,21 @@ class Trends::Links < Trends::Base
decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f)) decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
add_to_and_remove_from_subsets(preview_card.id, decaying_score, { next unless decaying_score >= options[:decay_threshold]
all: true,
allowed: preview_card.trendable?,
})
next unless valid_locale?(preview_card.language) global_items << { score: decaying_score, item: preview_card }
locale_items[preview_card.language] << { score: decaying_score, item: preview_card } if valid_locale?(preview_card.language)
add_to_and_remove_from_subsets(preview_card.id, decaying_score, {
"all:#{preview_card.language}" => true,
"allowed:#{preview_card.language}" => preview_card.trendable?,
})
end end
trim_older_items replace_items('', global_items)
# Clean up localized sets by calculating the intersection with the main
# set. We do this instead of just deleting the localized sets to avoid
# having moments where the API returns empty results
redis.pipelined do
Trends.available_locales.each do |locale| Trends.available_locales.each do |locale|
redis.zinterstore("#{key_prefix}:all:#{locale}", ["#{key_prefix}:all:#{locale}", "#{key_prefix}:all"], aggregate: 'max') replace_items(":#{locale}", locale_items[locale])
redis.zinterstore("#{key_prefix}:allowed:#{locale}", ["#{key_prefix}:allowed:#{locale}", "#{key_prefix}:allowed"], aggregate: 'max')
end end
end end
def filter_for_allowed_items(items)
items.select { |item| item[:item].trendable? }
end end
def would_be_trending?(id) def would_be_trending?(id)

View File

@ -7,6 +7,7 @@ class Trends::Statuses < Trends::Base
threshold: 5, threshold: 5,
review_threshold: 3, review_threshold: 3,
score_halflife: 2.hours.freeze, score_halflife: 2.hours.freeze,
decay_threshold: 0.3,
} }
class Query < Trends::Query class Query < Trends::Query
@ -31,7 +32,7 @@ class Trends::Statuses < Trends::Base
end end
def register(status, at_time = Time.now.utc) def register(status, at_time = Time.now.utc)
add(status.proper, status.account_id, at_time) if eligible?(status) add(status.proper, status.account_id, at_time) if eligible?(status.proper)
end end
def add(status, _account_id, at_time = Time.now.utc) def add(status, _account_id, at_time = Time.now.utc)
@ -74,15 +75,13 @@ class Trends::Statuses < Trends::Base
private private
def eligible?(status) def eligible?(status)
original_status = status.proper status.public_visibility? && status.account.discoverable? && !status.account.silenced? && status.spoiler_text.blank? && !status.sensitive? && !status.reply?
original_status.public_visibility? &&
original_status.account.discoverable? && !original_status.account.silenced? &&
original_status.spoiler_text.blank? && !original_status.sensitive? && !original_status.reply?
end end
def calculate_scores(statuses, at_time) def calculate_scores(statuses, at_time)
redis.pipelined do global_items = []
locale_items = Hash.new { |h, key| h[key] = [] }
statuses.each do |status| statuses.each do |status|
expected = 1.0 expected = 1.0
observed = (status.reblogs_count + status.favourites_count).to_f observed = (status.reblogs_count + status.favourites_count).to_f
@ -97,30 +96,24 @@ class Trends::Statuses < Trends::Base
decaying_score = score * (0.5**((at_time.to_f - status.created_at.to_f) / options[:score_halflife].to_f)) decaying_score = score * (0.5**((at_time.to_f - status.created_at.to_f) / options[:score_halflife].to_f))
add_to_and_remove_from_subsets(status.id, decaying_score, { next unless decaying_score >= options[:decay_threshold]
all: true,
allowed: status.trendable? && status.account.discoverable?,
})
next unless valid_locale?(status.language) global_items << { score: decaying_score, item: status }
locale_items[status.language] << { account_id: status.account_id, score: decaying_score, item: status } if valid_locale?(status.language)
add_to_and_remove_from_subsets(status.id, decaying_score, {
"all:#{status.language}" => true,
"allowed:#{status.language}" => status.trendable? && status.account.discoverable?,
})
end end
trim_older_items replace_items('', global_items)
# Clean up localized sets by calculating the intersection with the main
# set. We do this instead of just deleting the localized sets to avoid
# having moments where the API returns empty results
Trends.available_locales.each do |locale| Trends.available_locales.each do |locale|
redis.zinterstore("#{key_prefix}:all:#{locale}", ["#{key_prefix}:all:#{locale}", "#{key_prefix}:all"], aggregate: 'max') replace_items(":#{locale}", locale_items[locale])
redis.zinterstore("#{key_prefix}:allowed:#{locale}", ["#{key_prefix}:allowed:#{locale}", "#{key_prefix}:allowed"], aggregate: 'max')
end end
end end
def filter_for_allowed_items(items)
# Show only one status per account, pick the one with the highest score
# that's also eligible to trend
items.group_by { |item| item[:account_id] }.values.filter_map { |account_items| account_items.select { |item| item[:item].trendable? && item[:item].account.discoverable? }.max_by { |item| item[:score] } }
end end
def would_be_trending?(id) def would_be_trending?(id)

View File

@ -8,6 +8,7 @@ class Trends::Tags < Trends::Base
review_threshold: 3, review_threshold: 3,
max_score_cooldown: 2.days.freeze, max_score_cooldown: 2.days.freeze,
max_score_halflife: 4.hours.freeze, max_score_halflife: 4.hours.freeze,
decay_threshold: 1,
} }
def register(status, at_time = Time.now.utc) def register(status, at_time = Time.now.utc)
@ -26,7 +27,6 @@ class Trends::Tags < Trends::Base
def refresh(at_time = Time.now.utc) def refresh(at_time = Time.now.utc)
tags = Tag.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq) tags = Tag.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq)
calculate_scores(tags, at_time) calculate_scores(tags, at_time)
trim_older_items
end end
def request_review def request_review
@ -53,6 +53,8 @@ class Trends::Tags < Trends::Base
private private
def calculate_scores(tags, at_time) def calculate_scores(tags, at_time)
items = []
tags.each do |tag| tags.each do |tag|
expected = tag.history.get(at_time - 1.day).accounts.to_f expected = tag.history.get(at_time - 1.day).accounts.to_f
expected = 1.0 if expected.zero? expected = 1.0 if expected.zero?
@ -79,11 +81,16 @@ class Trends::Tags < Trends::Base
decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f)) decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
add_to_and_remove_from_subsets(tag.id, decaying_score, { next unless decaying_score >= options[:decay_threshold]
all: true,
allowed: tag.trendable?, items << { score: decaying_score, item: tag }
})
end end
replace_items('', items)
end
def filter_for_allowed_items(items)
items.select { |item| item[:item].trendable? }
end end
def would_be_trending?(id) def would_be_trending?(id)