Account archive download (#6460)

* Fix #201: Account archive download

* Export actor and private key in the archive

* Optimize BackupService

- Add conversation to cached associations of status, because
  somehow it was forgotten and is source of N+1 queries
- Explicitly call GC between batches of records being fetched
  (Model class allocations are the worst offender)
- Stream media files into the tar in 1MB chunks
  (Do not allocate media file (up to 8MB) as string into memory)
- Use #bytesize instead of #size to calculate file size for JSON
  (Fix FileOverflow error)
- Segment media into subfolders by status ID because apparently
  GIF-to-MP4 media are all named "media.mp4" for some reason

* Keep uniquely generated filename in Paperclip::GifTranscoder

* Ensure dumped files do not overwrite each other by maintaing directory partitions

* Give tar archives a good name

* Add scheduler to remove week-old backups

* Fix code style issue
This commit is contained in:
Eugen Rochko 2018-02-21 23:21:32 +01:00 committed by GitHub
parent c1e77b56a9
commit 61ed133fea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 374 additions and 7 deletions

View file

@ -1,11 +1,23 @@
# frozen_string_literal: true
class Settings::ExportsController < ApplicationController
include Authorization
layout 'admin'
before_action :authenticate_user!
def show
@export = Export.new(current_account)
@export = Export.new(current_account)
@backups = current_user.backups
end
def create
authorize :backup, :create?
backup = current_user.backups.create!
BackupWorker.perform_async(backup.id)
redirect_to settings_export_path
end
end

View file

@ -0,0 +1,4 @@
<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
<path d="M0 0h24v24H0z" fill="none"/>
</svg>

After

Width:  |  Height:  |  Size: 205 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 271 B

View file

@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer
mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject')
end
end
def backup_ready(user, backup)
@resource = user
@instance = Rails.configuration.x.local_domain
@backup = backup
return if @resource.disabled?
I18n.with_locale(@resource.locale || I18n.default_locale) do
mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject')
end
end
end

22
app/models/backup.rb Normal file
View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: backups
#
# id :integer not null, primary key
# user_id :integer
# dump_file_name :string
# dump_content_type :string
# dump_file_size :integer
# dump_updated_at :datetime
# processed :boolean default(FALSE), not null
# created_at :datetime not null
# updated_at :datetime not null
#
class Backup < ApplicationRecord
belongs_to :user, inverse_of: :backups
has_attached_file :dump
do_not_validate_attachment_file_type :dump
end

View file

@ -76,7 +76,7 @@ class Status < ApplicationRecord
scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) }
scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account
cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account
delegate :domain, to: :account, prefix: true

View file

@ -60,6 +60,7 @@ class User < ApplicationRecord
accepts_nested_attributes_for :account
has_many :applications, class_name: 'Doorkeeper::Application', as: :owner
has_many :backups, inverse_of: :user
validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale?
validates_with BlacklistedEmailValidator, if: :email_changed?

View file

@ -15,4 +15,8 @@ class ApplicationPolicy
def current_user
current_account&.user
end
def user_signed_in?
!current_user.nil?
end
end

View file

@ -0,0 +1,9 @@
# frozen_string_literal: true
class BackupPolicy < ApplicationPolicy
MIN_AGE = 1.week
def create?
user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero?
end
end

View file

@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
attribute :part_of, if: -> { object.part_of.present? }
has_one :first, if: -> { object.first.present? }
has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? }
has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? }
has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? }
has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? }
def type
if page?

View file

@ -0,0 +1,128 @@
# frozen_string_literal: true
require 'rubygems/package'
class BackupService < BaseService
attr_reader :account, :backup, :collection
def call(backup)
@backup = backup
@account = backup.user.account
build_json!
build_archive!
end
private
def build_json!
@collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
account.statuses.with_includes.find_in_batches do |statuses|
statuses.each do |status|
item = serialize(status, ActivityPub::ActivitySerializer)
item.delete(:'@context')
unless item[:type] == 'Announce' || item[:object][:attachment].blank?
item[:object][:attachment].each do |attachment|
attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
end
end
@collection[:orderedItems] << item
end
GC.start
end
end
def build_archive!
tmp_file = Tempfile.new(%w(archive .tar.gz))
File.open(tmp_file, 'wb') do |file|
Zlib::GzipWriter.wrap(file) do |gz|
Gem::Package::TarWriter.new(gz) do |tar|
dump_media_attachments!(tar)
dump_outbox!(tar)
dump_actor!(tar)
end
end
end
archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz'
@backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
@backup.processed = true
@backup.save!
ensure
tmp_file.close
tmp_file.unlink
end
def dump_media_attachments!(tar)
MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments|
media_attachments.each do |m|
download_to_tar(tar, m.file, m.file.path)
end
GC.start
end
end
def dump_outbox!(tar)
json = Oj.dump(collection)
tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
io.write(json)
end
end
def dump_actor!(tar)
actor = serialize(account, ActivityPub::ActorSerializer)
actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon]
actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image]
download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists?
download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists?
json = Oj.dump(actor)
tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
io.write(json)
end
tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io|
io.write(account.private_key)
end
end
def collection_presenter
ActivityPub::CollectionPresenter.new(
id: account_outbox_url(account),
type: :ordered,
size: account.statuses_count,
items: []
)
end
def serialize(object, serializer)
ActiveModelSerializers::SerializableResource.new(
object,
serializer: serializer,
adapter: ActivityPub::Adapter
).as_json
end
CHUNK_SIZE = 1.megabyte
def download_to_tar(tar, attachment, filename)
adapter = Paperclip.io_adapters.for(attachment)
tar.add_file_simple(filename, 0o444, adapter.size) do |io|
while (buffer = adapter.read(CHUNK_SIZE))
io.write(buffer)
end
end
end
end

View file

@ -20,3 +20,26 @@
%th= t('exports.mutes')
%td= @export.total_mutes
%td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv)
%p.muted-hint= t('exports.archive_takeout.hint_html')
- if policy(:backup).create?
%p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post
- unless @backups.empty?
.table-wrapper
%table.table
%thead
%tr
%th= t('exports.archive_takeout.date')
%th= t('exports.archive_takeout.size')
%th
%tbody
- @backups.each do |backup|
%tr
%td= l backup.created_at
- if backup.processed?
%td= number_to_human_size backup.dump_file_size
%td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url
- else
%td{ colspan: 2 }= t('exports.archive_takeout.in_progress')

View file

@ -0,0 +1,59 @@
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell.hero
.email-row
.col-6
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.text-center.padded
%table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td
= image_tag full_pack_url('icon_file_download.png'), alt: ''
%h1= t 'user_mailer.backup_ready.title'
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell.content-start
.email-row
.col-6
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.text-center
%p= t 'user_mailer.backup_ready.explanation'
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.button-cell
%table.button{ align: 'center', cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.button-primary
= link_to full_asset_url(@backup.dump.url) do
%span= t 'exports.archive_takeout.download'

View file

@ -0,0 +1,7 @@
<%= t 'user_mailer.backup_ready.title' %>
===
<%= t 'user_mailer.backup_ready.explanation' %>
=> <%= full_asset_url(@backup.dump.url) %>

View file

@ -0,0 +1,17 @@
# frozen_string_literal: true
class BackupWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(backup_id)
backup = Backup.find(backup_id)
user = backup.user
BackupService.new.call(backup)
user.backups.where.not(id: backup.id).destroy_all
UserMailer.backup_ready(user, backup).deliver_later
end
end

View file

@ -0,0 +1,16 @@
# frozen_string_literal: true
require 'sidekiq-scheduler'
class Scheduler::BackupCleanupScheduler
include Sidekiq::Worker
def perform
old_backups.find_each(&:destroy!)
end
private
def old_backups
Backup.where('created_at < ?', 7.days.ago)
end
end