From e17c2e5da5010beaefb46c8fffe35e87cb28f407 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 14 Jun 2017 18:01:35 +0200 Subject: [PATCH] Batched remove status service (#3735) * Make Pubsubhubbub::DistributionWorker handle both single stream entry arguments, as well as arrays of stream entries * Add BatchedRemoveStatusService, make SuspendAccountService use it * Improve method names * Add test * Add more tests * Use PuSH payloads of 100 to have a clear mapping of 1000 input statuses -> 10 PuSH payloads It was nice while it lasted --- app/services/batched_remove_status_service.rb | 114 ++++++++++++++++++ app/services/suspend_account_service.rb | 5 +- .../pubsubhubbub/distribution_worker.rb | 44 +++++-- .../batched_remove_status_service_spec.rb | 61 ++++++++++ .../pubsubhubbub/distribution_worker_spec.rb | 15 ++- 5 files changed, 216 insertions(+), 23 deletions(-) create mode 100644 app/services/batched_remove_status_service.rb create mode 100644 spec/services/batched_remove_status_service_spec.rb diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb new file mode 100644 index 00000000000..b462154ae76 --- /dev/null +++ b/app/services/batched_remove_status_service.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +class BatchedRemoveStatusService < BaseService + include StreamEntryRenderer + + # Delete given statuses and reblogs of them + # Dispatch PuSH updates of the deleted statuses, but only local ones + # Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones + # Remove statuses from home feeds + # Push delete events to streaming API for home feeds and public feeds + # @param [Status] statuses A preferably batched array of statuses + def call(statuses) + statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a } + + @mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h + @tags = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h + + @stream_entry_batches = [] + @salmon_batches = [] + @json_payloads = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h + + # Ensure that rendered XML reflects destroyed state + Status.where(id: statuses.map(&:id)).in_batches.destroy_all + + # Batch by source account + statuses.group_by(&:account_id).each do |_, account_statuses| + account = account_statuses.first.account + + unpush_from_home_timelines(account_statuses) + batch_stream_entries(account_statuses) if account.local? + end + + # Cannot be batched + statuses.each do |status| + unpush_from_public_timelines(status) + batch_salmon_slaps(status) if status.local? + end + + Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch } + NotificationWorker.push_bulk(@salmon_batches) { |batch| batch } + end + + private + + def batch_stream_entries(statuses) + stream_entry_ids = statuses.map { |s| s.stream_entry.id } + + stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids| + @stream_entry_batches << [batch_of_stream_entry_ids] + end + end + + def unpush_from_home_timelines(statuses) + account = statuses.first.account + recipients = account.followers.local.pluck(:id) + + recipients << account.id if account.local? + + recipients.each do |follower_id| + unpush(follower_id, statuses) + end + end + + def unpush_from_public_timelines(status) + payload = @json_payloads[status.id] + + redis.pipelined do + redis.publish('timeline:public', payload) + redis.publish('timeline:public:local', payload) if status.local? + + @tags[status.id].each do |hashtag| + redis.publish("timeline:hashtag:#{hashtag}", payload) + redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? + end + end + end + + def batch_salmon_slaps(status) + return if @mentions[status.id].empty? + + payload = stream_entry_to_xml(status.stream_entry.reload) + recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id) + + recipients.each do |recipient_id| + @salmon_batches << [payload, status.account_id, recipient_id] + end + end + + def unpush(follower_id, statuses) + key = FeedManager.instance.key(:home, follower_id) + + originals = statuses.reject(&:reblog?) + reblogs = statuses.reject { |s| !s.reblog? } + + # Quickly remove all originals + redis.pipelined do + originals.each do |status| + redis.zremrangebyscore(key, status.id, status.id) + redis.publish("timeline:#{follower_id}", @json_payloads[status.id]) + end + end + + # For reblogs, re-add original status to feed, unless the reblog + # was not in the feed in the first place + reblogs.each do |status| + redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil? + redis.publish("timeline:#{follower_id}", @json_payloads[status.id]) + end + end + + def redis + Redis.current + end +end diff --git a/app/services/suspend_account_service.rb b/app/services/suspend_account_service.rb index 8425002591b..1e3a51e4e26 100644 --- a/app/services/suspend_account_service.rb +++ b/app/services/suspend_account_service.rb @@ -17,9 +17,8 @@ class SuspendAccountService < BaseService end def purge_content - @account.statuses.reorder(nil).find_each do |status| - # This federates out deletes to previous followers - RemoveStatusService.new.call(status) + @account.statuses.reorder(nil).find_in_batches do |statuses| + BatchedRemoveStatusService.new.call(statuses) end [ diff --git a/app/workers/pubsubhubbub/distribution_worker.rb b/app/workers/pubsubhubbub/distribution_worker.rb index b8f5c35e16f..da01dcd91ea 100644 --- a/app/workers/pubsubhubbub/distribution_worker.rb +++ b/app/workers/pubsubhubbub/distribution_worker.rb @@ -5,25 +5,45 @@ class Pubsubhubbub::DistributionWorker sidekiq_options queue: 'push' - def perform(stream_entry_id) - stream_entry = StreamEntry.find(stream_entry_id) + def perform(stream_entry_ids) + stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? } - return if stream_entry.status&.direct_visibility? + return if stream_entries.empty? - @account = stream_entry.account - @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry])) - @domains = @account.followers_domains + @account = stream_entries.first.account + @subscriptions = active_subscriptions.to_a - Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription| - next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url) - Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload) - end - rescue ActiveRecord::RecordNotFound - true + distribute_public!(stream_entries.reject(&:hidden?)) + distribute_hidden!(stream_entries.reject { |s| !s.hidden? }) end private + def distribute_public!(stream_entries) + return if stream_entries.empty? + + @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries)) + + Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription| + [subscription.id, @payload] + end + end + + def distribute_hidden!(stream_entries) + return if stream_entries.empty? + + @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries)) + @domains = @account.followers_domains + + Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription| + [subscription.id, @payload] + end + end + + def active_subscriptions + Subscription.where(account: @account).active.select('id, callback_url') + end + def allowed_to_receive?(callback_url) @domains.include?(Addressable::URI.parse(callback_url).host) end diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb new file mode 100644 index 00000000000..c20085e25e3 --- /dev/null +++ b/spec/services/batched_remove_status_service_spec.rb @@ -0,0 +1,61 @@ +require 'rails_helper' + +RSpec.describe BatchedRemoveStatusService do + subject { BatchedRemoveStatusService.new } + + let!(:alice) { Fabricate(:account) } + let!(:bob) { Fabricate(:account, username: 'bob', domain: 'example.com', salmon_url: 'http://example.com/salmon') } + let!(:jeff) { Fabricate(:account) } + + let(:status1) { PostStatusService.new.call(alice, 'Hello @bob@example.com') } + let(:status2) { PostStatusService.new.call(alice, 'Another status') } + + before do + allow(Redis.current).to receive_messages(publish: nil) + + stub_request(:post, 'http://example.com/push').to_return(status: 200, body: '', headers: {}) + stub_request(:post, 'http://example.com/salmon').to_return(status: 200, body: '', headers: {}) + + Fabricate(:subscription, account: alice, callback_url: 'http://example.com/push', confirmed: true, expires_at: 30.days.from_now) + jeff.follow!(alice) + + status1 + status2 + + subject.call([status1, status2]) + end + + it 'removes statuses from author\'s home feed' do + expect(Feed.new(:home, alice).get(10)).to_not include([status1.id, status2.id]) + end + + it 'removes statuses from local follower\'s home feed' do + expect(Feed.new(:home, jeff).get(10)).to_not include([status1.id, status2.id]) + end + + it 'notifies streaming API of followers' do + expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once) + end + + it 'notifies streaming API of author' do + expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once) + end + + it 'notifies streaming API of public timeline' do + expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once) + end + + it 'sends PuSH update to PuSH subscribers with two payloads united' do + expect(a_request(:post, 'http://example.com/push').with { |req| + matches = req.body.scan(TagManager::VERBS[:delete]) + matches.size == 2 + }).to have_been_made + end + + it 'sends Salmon slap to previously mentioned users' do + expect(a_request(:post, "http://example.com/salmon").with { |req| + xml = OStatus2::Salmon.new.unpack(req.body) + xml.match(TagManager::VERBS[:delete]) + }).to have_been_made.once + end +end diff --git a/spec/workers/pubsubhubbub/distribution_worker_spec.rb b/spec/workers/pubsubhubbub/distribution_worker_spec.rb index 512adb04cdc..89191c084e2 100644 --- a/spec/workers/pubsubhubbub/distribution_worker_spec.rb +++ b/spec/workers/pubsubhubbub/distribution_worker_spec.rb @@ -16,10 +16,9 @@ describe Pubsubhubbub::DistributionWorker do let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) } it 'delivers payload to all subscriptions' do - allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) + allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) subject.perform(status.stream_entry.id) - expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/) - expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(anonymous_subscription.id, /.*/) + expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([anonymous_subscription, subscription_with_follower]) end end @@ -27,10 +26,10 @@ describe Pubsubhubbub::DistributionWorker do let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) } it 'delivers payload only to subscriptions with followers' do - allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) + allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) subject.perform(status.stream_entry.id) - expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/) - expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async).with(anonymous_subscription.id, /.*/) + expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([subscription_with_follower]) + expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk).with([anonymous_subscription]) end end @@ -38,9 +37,9 @@ describe Pubsubhubbub::DistributionWorker do let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) } it 'does not deliver payload' do - allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) + allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) subject.perform(status.stream_entry.id) - expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async) + expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk) end end end