From bb4756c823eef72ab6f9c52808726846a85a39f3 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 26 Jun 2023 14:17:41 +0200 Subject: [PATCH] Change files to be deleted in batches instead of one-by-one (#23302) --- app/lib/attachment_batch.rb | 103 +++++++++++++++++++++ app/lib/vacuum/media_attachments_vacuum.rb | 10 +- app/services/clear_domain_media_service.rb | 30 ++---- 3 files changed, 114 insertions(+), 29 deletions(-) create mode 100644 app/lib/attachment_batch.rb diff --git a/app/lib/attachment_batch.rb b/app/lib/attachment_batch.rb new file mode 100644 index 0000000000..41dc36b64c --- /dev/null +++ b/app/lib/attachment_batch.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +class AttachmentBatch + # Maximum amount of objects you can delete in an S3 API call. It's + # important to remember that this does not correspond to the number + # of records in the batch, since records can have multiple attachments + LIMIT = 1_000 + + # Attributes generated and maintained by Paperclip (not all of them + # are always used on every class, however) + NULLABLE_ATTRIBUTES = %w( + file_name + content_type + file_size + fingerprint + created_at + updated_at + ).freeze + + # Styles that are always present even when not explicitly defined + BASE_STYLES = %i(original).freeze + + attr_reader :klass, :records, :storage_mode + + def initialize(klass, records) + @klass = klass + @records = records + @storage_mode = Paperclip::Attachment.default_options[:storage] + @attachment_names = klass.attachment_definitions.keys + end + + def delete + remove_files + batch.delete_all + end + + def clear + remove_files + batch.update_all(nullified_attributes) # rubocop:disable Rails/SkipsModelValidations + end + + private + + def batch + klass.where(id: records.map(&:id)) + end + + def remove_files + keys = [] + + logger.debug { "Preparing to delete attachments for #{records.size} records" } + + records.each do |record| + @attachment_names.each do |attachment_name| + attachment = record.public_send(attachment_name) + styles = BASE_STYLES | attachment.styles.keys + + next if attachment.blank? + + styles.each do |style| + case @storage_mode + when :s3 + logger.debug { "Adding #{attachment.path(style)} to batch for deletion" } + keys << attachment.style_name_as_path(style) + when :filesystem + logger.debug { "Deleting #{attachment.path(style)}" } + FileUtils.remove_file(attachment.path(style)) + when :fog + logger.debug { "Deleting #{attachment.path(style)}" } + attachment.directory.files.new(key: attachment.path(style)).destroy + end + end + end + end + + return unless storage_mode == :s3 + + # We can batch deletes over S3, but there is a limit of how many + # objects can be processed at once, so we have to potentially + # separate them into multiple calls. + + keys.each_slice(LIMIT) do |keys_slice| + logger.debug { "Deleting #{keys_slice.size} objects" } + + bucket.delete_objects(delete: { + objects: keys_slice.map { |key| { key: key } }, + quiet: true, + }) + end + end + + def bucket + @bucket ||= records.first.public_send(@attachment_names.first).s3_bucket + end + + def nullified_attributes + @attachment_names.flat_map { |attachment_name| NULLABLE_ATTRIBUTES.map { |attribute| "#{attachment_name}_#{attribute}" } & klass.column_names }.index_with(nil) + end + + def logger + Rails.logger + end +end diff --git a/app/lib/vacuum/media_attachments_vacuum.rb b/app/lib/vacuum/media_attachments_vacuum.rb index 7c0a85a9d9..7b21c84bbc 100644 --- a/app/lib/vacuum/media_attachments_vacuum.rb +++ b/app/lib/vacuum/media_attachments_vacuum.rb @@ -15,15 +15,15 @@ class Vacuum::MediaAttachmentsVacuum private def vacuum_cached_files! - media_attachments_past_retention_period.find_each do |media_attachment| - media_attachment.file.destroy - media_attachment.thumbnail.destroy - media_attachment.save + media_attachments_past_retention_period.find_in_batches do |media_attachments| + AttachmentBatch.new(MediaAttachment, media_attachments).clear end end def vacuum_orphaned_records! - orphaned_media_attachments.in_batches.destroy_all + orphaned_media_attachments.find_in_batches do |media_attachments| + AttachmentBatch.new(MediaAttachment, media_attachments).delete + end end def media_attachments_past_retention_period diff --git a/app/services/clear_domain_media_service.rb b/app/services/clear_domain_media_service.rb index 9e70ebe51c..7bf2d62fb0 100644 --- a/app/services/clear_domain_media_service.rb +++ b/app/services/clear_domain_media_service.rb @@ -10,14 +10,6 @@ class ClearDomainMediaService < BaseService private - def invalidate_association_caches!(status_ids) - # Normally, associated models of a status are immutable (except for accounts) - # so they are aggressively cached. After updating the media attachments to no - # longer point to a local file, we need to clear the cache to make those - # changes appear in the API and UI - Rails.cache.delete_multi(status_ids.map { |id| "statuses/#{id}" }) - end - def clear_media! clear_account_images! clear_account_attachments! @@ -25,31 +17,21 @@ class ClearDomainMediaService < BaseService end def clear_account_images! - blocked_domain_accounts.reorder(nil).find_each do |account| - account.avatar.destroy if account.avatar&.exists? - account.header.destroy if account.header&.exists? - account.save + blocked_domain_accounts.reorder(nil).find_in_batches do |accounts| + AttachmentBatch.new(Account, accounts).clear end end def clear_account_attachments! media_from_blocked_domain.reorder(nil).find_in_batches do |attachments| - affected_status_ids = [] - - attachments.each do |attachment| - affected_status_ids << attachment.status_id if attachment.status_id.present? - - attachment.file.destroy if attachment.file&.exists? - attachment.type = :unknown - attachment.save - end - - invalidate_association_caches!(affected_status_ids) unless affected_status_ids.empty? + AttachmentBatch.new(MediaAttachment, attachments).clear end end def clear_emojos! - emojis_from_blocked_domains.destroy_all + emojis_from_blocked_domains.find_in_batches do |custom_emojis| + AttachmentBatch.new(CustomEmoji, custom_emojis).delete + end end def blocked_domain