Optimize archive export service and export zip files instead of gzipped tar files (#23360)
parent
8131a5b330
commit
b233da5996
1
Gemfile
1
Gemfile
|
@ -162,3 +162,4 @@ gem 'xorcist', '~> 1.1'
|
||||||
gem 'cocoon', '~> 1.2'
|
gem 'cocoon', '~> 1.2'
|
||||||
|
|
||||||
gem 'net-http', '~> 0.3.2'
|
gem 'net-http', '~> 0.3.2'
|
||||||
|
gem 'rubyzip', '~> 2.3'
|
||||||
|
|
|
@ -636,6 +636,7 @@ GEM
|
||||||
nokogiri (>= 1.10.5)
|
nokogiri (>= 1.10.5)
|
||||||
rexml
|
rexml
|
||||||
ruby2_keywords (0.0.5)
|
ruby2_keywords (0.0.5)
|
||||||
|
rubyzip (2.3.2)
|
||||||
rufus-scheduler (3.8.2)
|
rufus-scheduler (3.8.2)
|
||||||
fugit (~> 1.1, >= 1.1.6)
|
fugit (~> 1.1, >= 1.1.6)
|
||||||
safety_net_attestation (0.4.0)
|
safety_net_attestation (0.4.0)
|
||||||
|
@ -876,6 +877,7 @@ DEPENDENCIES
|
||||||
rubocop-rails
|
rubocop-rails
|
||||||
rubocop-rspec
|
rubocop-rspec
|
||||||
ruby-progressbar (~> 1.13)
|
ruby-progressbar (~> 1.13)
|
||||||
|
rubyzip (~> 2.3)
|
||||||
sanitize (~> 6.0)
|
sanitize (~> 6.0)
|
||||||
scenic (~> 1.7)
|
scenic (~> 1.7)
|
||||||
sidekiq (~> 6.5)
|
sidekiq (~> 6.5)
|
||||||
|
|
|
@ -1,59 +1,67 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
require 'rubygems/package'
|
require 'zip'
|
||||||
|
|
||||||
class BackupService < BaseService
|
class BackupService < BaseService
|
||||||
include Payloadable
|
include Payloadable
|
||||||
|
include ContextHelper
|
||||||
|
|
||||||
attr_reader :account, :backup, :collection
|
attr_reader :account, :backup
|
||||||
|
|
||||||
def call(backup)
|
def call(backup)
|
||||||
@backup = backup
|
@backup = backup
|
||||||
@account = backup.user.account
|
@account = backup.user.account
|
||||||
|
|
||||||
build_json!
|
|
||||||
build_archive!
|
build_archive!
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def build_json!
|
def build_outbox_json!(file)
|
||||||
@collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
|
skeleton = serialize(collection_presenter, ActivityPub::CollectionSerializer)
|
||||||
|
skeleton[:@context] = full_context
|
||||||
|
skeleton[:orderedItems] = ['!PLACEHOLDER!']
|
||||||
|
skeleton = Oj.dump(skeleton)
|
||||||
|
prepend, append = skeleton.split('"!PLACEHOLDER!"')
|
||||||
|
add_comma = false
|
||||||
|
|
||||||
|
file.write(prepend)
|
||||||
|
|
||||||
account.statuses.with_includes.reorder(nil).find_in_batches do |statuses|
|
account.statuses.with_includes.reorder(nil).find_in_batches do |statuses|
|
||||||
statuses.each do |status|
|
file.write(',') if add_comma
|
||||||
item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer, signer: @account)
|
add_comma = true
|
||||||
item.delete(:@context)
|
|
||||||
|
file.write(statuses.map do |status|
|
||||||
|
item = serialize_payload(ActivityPub::ActivityPresenter.from_status(status), ActivityPub::ActivitySerializer)
|
||||||
|
item.delete('@context')
|
||||||
|
|
||||||
unless item[:type] == 'Announce' || item[:object][:attachment].blank?
|
unless item[:type] == 'Announce' || item[:object][:attachment].blank?
|
||||||
item[:object][:attachment].each do |attachment|
|
item[:object][:attachment].each do |attachment|
|
||||||
attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
|
attachment[:url] = Addressable::URI.parse(attachment[:url]).path.delete_prefix('/system/')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@collection[:orderedItems] << item
|
Oj.dump(item)
|
||||||
end
|
end.join(','))
|
||||||
|
|
||||||
GC.start
|
GC.start
|
||||||
end
|
end
|
||||||
|
|
||||||
|
file.write(append)
|
||||||
end
|
end
|
||||||
|
|
||||||
def build_archive!
|
def build_archive!
|
||||||
tmp_file = Tempfile.new(%w(archive .tar.gz))
|
tmp_file = Tempfile.new(%w(archive .zip))
|
||||||
|
|
||||||
File.open(tmp_file, 'wb') do |file|
|
Zip::File.open(tmp_file, create: true) do |zipfile|
|
||||||
Zlib::GzipWriter.wrap(file) do |gz|
|
dump_outbox!(zipfile)
|
||||||
Gem::Package::TarWriter.new(gz) do |tar|
|
dump_media_attachments!(zipfile)
|
||||||
dump_media_attachments!(tar)
|
dump_likes!(zipfile)
|
||||||
dump_outbox!(tar)
|
dump_bookmarks!(zipfile)
|
||||||
dump_likes!(tar)
|
dump_actor!(zipfile)
|
||||||
dump_bookmarks!(tar)
|
|
||||||
dump_actor!(tar)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.tar.gz"
|
archive_filename = "#{['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(16)].join('-')}.zip"
|
||||||
|
|
||||||
@backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
|
@backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
|
||||||
@backup.processed = true
|
@backup.processed = true
|
||||||
|
@ -63,27 +71,28 @@ class BackupService < BaseService
|
||||||
tmp_file.unlink
|
tmp_file.unlink
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump_media_attachments!(tar)
|
def dump_media_attachments!(zipfile)
|
||||||
MediaAttachment.attached.where(account: account).reorder(nil).find_in_batches do |media_attachments|
|
MediaAttachment.attached.where(account: account).reorder(nil).find_in_batches do |media_attachments|
|
||||||
media_attachments.each do |m|
|
media_attachments.each do |m|
|
||||||
next unless m.file&.path
|
path = m.file&.path
|
||||||
|
next unless path
|
||||||
|
|
||||||
download_to_tar(tar, m.file, m.file.path)
|
path = path.gsub(/\A.*\/system\//, '')
|
||||||
|
path = path.gsub(/\A\/+/, '')
|
||||||
|
download_to_zip(zipfile, m.file, path)
|
||||||
end
|
end
|
||||||
|
|
||||||
GC.start
|
GC.start
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump_outbox!(tar)
|
def dump_outbox!(zipfile)
|
||||||
json = Oj.dump(collection)
|
zipfile.get_output_stream('outbox.json') do |io|
|
||||||
|
build_outbox_json!(io)
|
||||||
tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
|
|
||||||
io.write(json)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump_actor!(tar)
|
def dump_actor!(zipfile)
|
||||||
actor = serialize(account, ActivityPub::ActorSerializer)
|
actor = serialize(account, ActivityPub::ActorSerializer)
|
||||||
|
|
||||||
actor[:icon][:url] = "avatar#{File.extname(actor[:icon][:url])}" if actor[:icon]
|
actor[:icon][:url] = "avatar#{File.extname(actor[:icon][:url])}" if actor[:icon]
|
||||||
|
@ -92,51 +101,66 @@ class BackupService < BaseService
|
||||||
actor[:likes] = 'likes.json'
|
actor[:likes] = 'likes.json'
|
||||||
actor[:bookmarks] = 'bookmarks.json'
|
actor[:bookmarks] = 'bookmarks.json'
|
||||||
|
|
||||||
download_to_tar(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists?
|
download_to_zip(tar, account.avatar, "avatar#{File.extname(account.avatar.path)}") if account.avatar.exists?
|
||||||
download_to_tar(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists?
|
download_to_zip(tar, account.header, "header#{File.extname(account.header.path)}") if account.header.exists?
|
||||||
|
|
||||||
json = Oj.dump(actor)
|
json = Oj.dump(actor)
|
||||||
|
|
||||||
tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
|
zipfile.get_output_stream('actor.json') do |io|
|
||||||
io.write(json)
|
io.write(json)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump_likes!(tar)
|
def dump_likes!(zipfile)
|
||||||
collection = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer)
|
skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'likes.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer)
|
||||||
|
skeleton.delete(:totalItems)
|
||||||
|
skeleton[:orderedItems] = ['!PLACEHOLDER!']
|
||||||
|
skeleton = Oj.dump(skeleton)
|
||||||
|
prepend, append = skeleton.split('"!PLACEHOLDER!"')
|
||||||
|
|
||||||
|
zipfile.get_output_stream('likes.json') do |io|
|
||||||
|
io.write(prepend)
|
||||||
|
|
||||||
|
add_comma = false
|
||||||
|
|
||||||
Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses|
|
Status.reorder(nil).joins(:favourites).includes(:account).merge(account.favourites).find_in_batches do |statuses|
|
||||||
statuses.each do |status|
|
io.write(',') if add_comma
|
||||||
collection[:totalItems] += 1
|
add_comma = true
|
||||||
collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status)
|
|
||||||
end
|
io.write(statuses.map do |status|
|
||||||
|
Oj.dump(ActivityPub::TagManager.instance.uri_for(status))
|
||||||
|
end.join(','))
|
||||||
|
|
||||||
GC.start
|
GC.start
|
||||||
end
|
end
|
||||||
|
|
||||||
json = Oj.dump(collection)
|
io.write(append)
|
||||||
|
|
||||||
tar.add_file_simple('likes.json', 0o444, json.bytesize) do |io|
|
|
||||||
io.write(json)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump_bookmarks!(tar)
|
def dump_bookmarks!(zipfile)
|
||||||
collection = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer)
|
skeleton = serialize(ActivityPub::CollectionPresenter.new(id: 'bookmarks.json', type: :ordered, size: 0, items: []), ActivityPub::CollectionSerializer)
|
||||||
|
skeleton.delete(:totalItems)
|
||||||
|
skeleton[:orderedItems] = ['!PLACEHOLDER!']
|
||||||
|
skeleton = Oj.dump(skeleton)
|
||||||
|
prepend, append = skeleton.split('"!PLACEHOLDER!"')
|
||||||
|
|
||||||
|
zipfile.get_output_stream('bookmarks.json') do |io|
|
||||||
|
io.write(prepend)
|
||||||
|
|
||||||
|
add_comma = false
|
||||||
Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses|
|
Status.reorder(nil).joins(:bookmarks).includes(:account).merge(account.bookmarks).find_in_batches do |statuses|
|
||||||
statuses.each do |status|
|
io.write(',') if add_comma
|
||||||
collection[:totalItems] += 1
|
add_comma = true
|
||||||
collection[:orderedItems] << ActivityPub::TagManager.instance.uri_for(status)
|
|
||||||
end
|
io.write(statuses.map do |status|
|
||||||
|
Oj.dump(ActivityPub::TagManager.instance.uri_for(status))
|
||||||
|
end.join(','))
|
||||||
|
|
||||||
GC.start
|
GC.start
|
||||||
end
|
end
|
||||||
|
|
||||||
json = Oj.dump(collection)
|
io.write(append)
|
||||||
|
|
||||||
tar.add_file_simple('bookmarks.json', 0o444, json.bytesize) do |io|
|
|
||||||
io.write(json)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -159,10 +183,10 @@ class BackupService < BaseService
|
||||||
|
|
||||||
CHUNK_SIZE = 1.megabyte
|
CHUNK_SIZE = 1.megabyte
|
||||||
|
|
||||||
def download_to_tar(tar, attachment, filename)
|
def download_to_zip(zipfile, attachment, filename)
|
||||||
adapter = Paperclip.io_adapters.for(attachment)
|
adapter = Paperclip.io_adapters.for(attachment)
|
||||||
|
|
||||||
tar.add_file_simple(filename, 0o444, adapter.size) do |io|
|
zipfile.get_output_stream(filename) do |io|
|
||||||
while (buffer = adapter.read(CHUNK_SIZE))
|
while (buffer = adapter.read(CHUNK_SIZE))
|
||||||
io.write(buffer)
|
io.write(buffer)
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe BackupService, type: :service do
|
||||||
|
subject(:service_call) { described_class.new.call(backup) }
|
||||||
|
|
||||||
|
let!(:user) { Fabricate(:user) }
|
||||||
|
let!(:attachment) { Fabricate(:media_attachment, account: user.account) }
|
||||||
|
let!(:status) { Fabricate(:status, account: user.account, text: 'Hello', visibility: :public, media_attachments: [attachment]) }
|
||||||
|
let!(:private_status) { Fabricate(:status, account: user.account, text: 'secret', visibility: :private) }
|
||||||
|
let!(:favourite) { Fabricate(:favourite, account: user.account) }
|
||||||
|
let!(:bookmark) { Fabricate(:bookmark, account: user.account) }
|
||||||
|
let!(:backup) { Fabricate(:backup, user: user) }
|
||||||
|
|
||||||
|
def read_zip_file(backup, filename)
|
||||||
|
file = Paperclip.io_adapters.for(backup.dump)
|
||||||
|
Zip::File.open(file) do |zipfile|
|
||||||
|
entry = zipfile.glob(filename).first
|
||||||
|
return entry.get_input_stream.read
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'marks the backup as processed' do
|
||||||
|
expect { service_call }.to change(backup, :processed).from(false).to(true)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'exports outbox.json as expected' do
|
||||||
|
service_call
|
||||||
|
|
||||||
|
json = Oj.load(read_zip_file(backup, 'outbox.json'))
|
||||||
|
expect(json['@context']).to_not be_nil
|
||||||
|
expect(json['type']).to eq 'OrderedCollection'
|
||||||
|
expect(json['totalItems']).to eq 2
|
||||||
|
expect(json['orderedItems'][0]['@context']).to be_nil
|
||||||
|
expect(json['orderedItems'][0]).to include({
|
||||||
|
'type' => 'Create',
|
||||||
|
'object' => include({
|
||||||
|
'id' => ActivityPub::TagManager.instance.uri_for(status),
|
||||||
|
'content' => '<p>Hello</p>',
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
expect(json['orderedItems'][1]).to include({
|
||||||
|
'type' => 'Create',
|
||||||
|
'object' => include({
|
||||||
|
'id' => ActivityPub::TagManager.instance.uri_for(private_status),
|
||||||
|
'content' => '<p>secret</p>',
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'exports likes.json as expected' do
|
||||||
|
service_call
|
||||||
|
|
||||||
|
json = Oj.load(read_zip_file(backup, 'likes.json'))
|
||||||
|
expect(json['type']).to eq 'OrderedCollection'
|
||||||
|
expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(favourite.status)]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'exports bookmarks.json as expected' do
|
||||||
|
service_call
|
||||||
|
|
||||||
|
json = Oj.load(read_zip_file(backup, 'bookmarks.json'))
|
||||||
|
expect(json['type']).to eq 'OrderedCollection'
|
||||||
|
expect(json['orderedItems']).to eq [ActivityPub::TagManager.instance.uri_for(bookmark.status)]
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue