From 0ac7efdc50200020f42b2d94151ecd80144e3d8c Mon Sep 17 00:00:00 2001 From: Takeshi Umeda Date: Sat, 27 Nov 2021 06:08:47 +0900 Subject: [PATCH] Fix performance of tootctl statuses remove (#17052) * Fix performance of tootctl statuses remove * Fix model class --- lib/mastodon/statuses_cli.rb | 94 +++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/lib/mastodon/statuses_cli.rb b/lib/mastodon/statuses_cli.rb index b9dccdd8a32..f841529e051 100644 --- a/lib/mastodon/statuses_cli.rb +++ b/lib/mastodon/statuses_cli.rb @@ -6,6 +6,7 @@ require_relative 'cli_helper' module Mastodon class StatusesCLI < Thor + include CLIHelper include ActionView::Helpers::NumberHelper def self.exit_on_failure? @@ -15,6 +16,8 @@ module Mastodon option :days, type: :numeric, default: 90 option :clean_followed, type: :boolean option :skip_media_remove, type: :boolean + option :vacuum, type: :boolean, default: false, desc: 'Reduce the file size and update the statistics. This option locks the table for a long time, so run it offline' + option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch' desc 'remove', 'Remove unreferenced statuses' long_desc <<~LONG_DESC Remove statuses that are not referenced by local user activity, such as @@ -25,52 +28,89 @@ module Mastodon indices before commencing, and removes them afterward. LONG_DESC def remove + if options[:batch_size] < 1 + say('Cannot run with this batch_size setting, must be at least 1', :red) + exit(1) + end + say('Creating temporary database indices...') - ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local) - ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id) - ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently) unless ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url) + ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently, if_not_exists: true) + ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently, if_not_exists: true) + ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently, if_not_exists: true) max_id = Mastodon::Snowflake.id_at(options[:days].days.ago) start_at = Time.now.to_f + say('Extract the deletion target... This might take a while...') + + ActiveRecord::Base.connection.create_table('statuses_to_be_deleted', temporary: true) + + # Skip accounts followed by local accounts + clean_followed_sql = 'AND NOT EXISTS (SELECT 1 FROM follows WHERE statuses.account_id = follows.target_account_id)' unless options[:clean_followed] + + ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL', [[nil, max_id]]) + INSERT INTO statuses_to_be_deleted (id) + SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1) + AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id) + AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local)) + AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1)) + AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id) + AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL)) + AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL)) + AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL)) + #{clean_followed_sql} + SQL + + say('Removing temporary database indices to restore write performance...') + + ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true) + ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true) + say('Beginning removal... This might take a while...') - scope = Status.remote.where('id < ?', max_id) - # Skip reblogs of local statuses - scope = scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))') - # Skip statuses that are pinned on profiles - scope = scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)') - # Skip statuses that mention local accounts - scope = scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') - # Skip statuses which have replies - scope = scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)') - # Skip statuses reblogged by local accounts or with recent boosts - scope = scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))', max_id) - # Skip statuses favourited by local users - scope = scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') - # Skip statuses bookmarked by local users - scope = scope.where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') - - unless options[:clean_followed] - # Skip accounts followed by local accounts - scope = scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)') + klass = Class.new(ApplicationRecord) do |c| + c.table_name = 'statuses_to_be_deleted' end - scope.in_batches.delete_all + Object.const_set('StatusToBeDeleted', klass) + + scope = StatusToBeDeleted + processed = 0 + removed = 0 + progress = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil) + + scope.reorder(nil).in_batches(of: options[:batch_size]) do |relation| + ids = relation.pluck(:id) + processed += ids.count + removed += Status.unscoped.where(id: ids).delete_all + progress.increment + end + + progress.stop + + if options[:vacuum] + say('Run VACUUM and ANALYZE to statuses...') + + ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE statuses') + else + say('Run ANALYZE to statuses...') + + ActiveRecord::Base.connection.execute('ANALYZE statuses') + end unless options[:skip_media_remove] say('Beginning removal of now-orphaned media attachments to free up disk space...') Scheduler::MediaCleanupScheduler.new.perform end - say("Done after #{Time.now.to_f - start_at}s", :green) + say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} statuses.", :green) ensure say('Removing temporary database indices to restore write performance...') - ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local) if ActiveRecord::Base.connection.index_name_exists?(:accounts, :index_accounts_local) - ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id) if ActiveRecord::Base.connection.index_name_exists?(:status_pins, :index_status_pins_status_id) - ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url) if ActiveRecord::Base.connection.index_name_exists?(:media_attachments, :index_media_attachments_remote_url) + ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true) + ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true) + ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url, if_exists: true) end end end