2018-08-25 07:25:39 -04:00
# frozen_string_literal: true
2023-05-24 05:55:40 -04:00
require_relative 'base'
2018-08-25 07:25:39 -04:00
2023-05-23 10:08:26 -04:00
module Mastodon::CLI
2023-05-24 05:55:40 -04:00
class Media < Base
2018-11-08 15:06:26 -05:00
include ActionView :: Helpers :: NumberHelper
2023-05-03 23:33:55 -04:00
VALID_PATH_SEGMENTS_SIZE = [ 7 , 10 ] . freeze
2019-09-10 07:48:48 -04:00
option :days , type : :numeric , default : 7 , aliases : [ :d ]
2022-12-14 13:50:07 -05:00
option :prune_profiles , type : :boolean , default : false
option :remove_headers , type : :boolean , default : false
option :include_follows , type : :boolean , default : false
2019-09-10 07:48:48 -04:00
option :concurrency , type : :numeric , default : 5 , aliases : [ :c ]
2024-06-07 08:39:53 -04:00
option :verbose , type : :boolean , default : false , aliases : [ :v ]
2018-08-30 21:46:13 -04:00
option :dry_run , type : :boolean , default : false
2022-12-14 13:50:07 -05:00
desc 'remove' , 'Remove remote media files, headers or avatars'
2018-08-25 07:25:39 -04:00
long_desc <<-DESC
2022-12-14 13:50:07 -05:00
Removes locally cached copies of media attachments ( and optionally profile
2023-05-19 11:13:29 -04:00
headers and avatars ) from other servers . By default , only media attachments
2022-12-14 13:50:07 -05:00
are removed .
2018-08-25 07:25:39 -04:00
The - - days option specifies how old media attachments have to be before
2022-12-14 13:50:07 -05:00
they are removed . In case of avatars and headers , it specifies how old
the last webfinger request and update to the user has to be before they
are pruned . It defaults to 7 days .
If - - prune - profiles is specified , only avatars and headers are removed .
If - - remove - headers is specified , only headers are removed .
If - - include - follows is specified along with - - prune - profiles or
- - remove - headers , all non - local profiles will be pruned irrespective of
follow status . By default , only accounts that are not followed by or
following anyone locally are pruned .
2018-08-25 07:25:39 -04:00
DESC
def remove
2024-01-26 03:53:44 -05:00
fail_with_message '--prune-profiles and --remove-headers should not be specified simultaneously' if options [ :prune_profiles ] && options [ :remove_headers ]
2023-05-30 10:07:44 -04:00
2024-01-26 03:53:44 -05:00
fail_with_message '--include-follows can only be used with --prune-profiles or --remove-headers' if options [ :include_follows ] && ! ( options [ :prune_profiles ] || options [ :remove_headers ] )
2023-05-30 10:07:44 -04:00
time_ago = options [ :days ] . days . ago
2018-08-25 07:25:39 -04:00
2022-12-14 13:50:07 -05:00
if options [ :prune_profiles ] || options [ :remove_headers ]
processed , aggregate = parallelize_with_progress ( Account . remote . where ( { last_webfingered_at : .. time_ago , updated_at : .. time_ago } ) ) do | account |
next if ! options [ :include_follows ] && Follow . where ( account : account ) . or ( Follow . where ( target_account : account ) ) . exists?
next if account . avatar . blank? && account . header . blank?
next if options [ :remove_headers ] && account . header . blank?
2019-09-10 07:48:48 -04:00
2023-12-01 11:00:44 -05:00
size = account . header_file_size || 0
size += account . avatar_file_size || 0 if options [ :prune_profiles ]
2018-08-25 07:25:39 -04:00
2023-05-30 10:07:44 -04:00
unless dry_run?
2022-12-14 13:50:07 -05:00
account . header . destroy
account . avatar . destroy if options [ :prune_profiles ]
account . save!
end
size
2019-09-10 07:48:48 -04:00
end
2018-08-26 10:53:06 -04:00
2023-05-30 10:07:44 -04:00
say ( " Visited #{ processed } accounts and removed profile media totaling #{ number_to_human_size ( aggregate ) } #{ dry_run_mode_suffix } " , :green , true )
2018-08-26 10:53:06 -04:00
end
2019-09-10 07:48:48 -04:00
2022-12-14 13:50:07 -05:00
unless options [ :prune_profiles ] || options [ :remove_headers ]
2024-01-30 10:29:42 -05:00
processed , aggregate = parallelize_with_progress ( MediaAttachment . cached . remote . where ( created_at : .. time_ago ) ) do | media_attachment |
2022-12-14 13:50:07 -05:00
next if media_attachment . file . blank?
size = ( media_attachment . file_file_size || 0 ) + ( media_attachment . thumbnail_file_size || 0 )
2023-05-30 10:07:44 -04:00
unless dry_run?
2022-12-14 13:50:07 -05:00
media_attachment . file . destroy
media_attachment . thumbnail . destroy
media_attachment . save
end
size
end
2023-05-30 10:07:44 -04:00
say ( " Removed #{ processed } media attachments (approx. #{ number_to_human_size ( aggregate ) } ) #{ dry_run_mode_suffix } " , :green , true )
2022-12-14 13:50:07 -05:00
end
2018-08-25 07:25:39 -04:00
end
2019-09-10 09:29:12 -04:00
2019-12-08 09:37:12 -05:00
option :start_after
2020-03-25 20:56:41 -04:00
option :prefix
2020-08-31 21:33:21 -04:00
option :fix_permissions , type : :boolean , default : false
2019-12-08 09:37:12 -05:00
option :dry_run , type : :boolean , default : false
desc 'remove-orphans' , 'Scan storage and check for files that do not belong to existing media attachments'
long_desc << ~ LONG_DESC
Scans file storage for files that do not belong to existing media attachments . Because this operation
requires iterating over every single file individually , it will be slow .
Please mind that some storage providers charge for the necessary API requests to list objects .
LONG_DESC
def remove_orphans
progress = create_progress_bar ( nil )
reclaimed_bytes = 0
removed = 0
2020-03-25 20:56:41 -04:00
prefix = options [ :prefix ]
2019-12-08 09:37:12 -05:00
case Paperclip :: Attachment . default_options [ :storage ]
when :s3
paperclip_instance = MediaAttachment . new . file
s3_interface = paperclip_instance . s3_interface
2020-09-01 18:17:58 -04:00
s3_permissions = Paperclip :: Attachment . default_options [ :s3_permissions ]
2019-12-08 09:37:12 -05:00
bucket = s3_interface . bucket ( Paperclip :: Attachment . default_options [ :s3_credentials ] [ :bucket ] )
last_key = options [ :start_after ]
loop do
2019-12-08 22:26:00 -05:00
objects = begin
2023-02-18 17:09:40 -05:00
bucket . objects ( start_after : last_key , prefix : prefix ) . limit ( 1000 ) . map { | x | x }
rescue = > e
progress . log ( pastel . red ( " Error fetching list of files: #{ e } " ) )
progress . log ( " If you want to continue from this point, add --start-after= #{ last_key } to your command " ) if last_key
break
2019-12-08 22:26:00 -05:00
end
2019-12-08 09:37:12 -05:00
break if objects . empty?
2020-03-25 20:56:41 -04:00
last_key = objects . last . key
record_map = preload_records_from_mixed_objects ( objects )
2019-12-08 09:37:12 -05:00
objects . each do | object |
2023-05-30 10:07:44 -04:00
object . acl . put ( acl : s3_permissions ) if options [ :fix_permissions ] && ! dry_run?
2020-08-31 21:33:21 -04:00
2020-04-26 17:29:08 -04:00
path_segments = object . key . split ( '/' )
path_segments . delete ( 'cache' )
2023-05-03 23:33:55 -04:00
unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-15 12:41:27 -04:00
progress . log ( pastel . yellow ( " Unrecognized file found: #{ object . key } " ) )
next
end
2020-03-25 20:56:41 -04:00
model_name = path_segments . first . classify
attachment_name = path_segments [ 1 ] . singularize
2024-05-15 09:11:13 -04:00
record_id = path_segments [ 2 ... - 2 ] . join . to_i
2020-03-25 20:56:41 -04:00
file_name = path_segments . last
record = record_map . dig ( model_name , record_id )
attachment = record & . public_send ( attachment_name )
2019-12-08 09:37:12 -05:00
progress . increment
2020-03-25 20:56:41 -04:00
next unless attachment . blank? || ! attachment . variant? ( file_name )
2019-12-08 09:37:12 -05:00
2019-12-08 22:26:00 -05:00
begin
2023-05-30 10:07:44 -04:00
object . delete unless dry_run?
2019-12-08 22:26:00 -05:00
reclaimed_bytes += object . size
removed += 1
progress . log ( " Found and removed orphan: #{ object . key } " )
rescue = > e
progress . log ( pastel . red ( " Error processing #{ object . key } : #{ e } " ) )
end
2019-12-08 09:37:12 -05:00
end
end
when :fog
2024-01-26 03:53:44 -05:00
fail_with_message 'The fog storage driver is not supported for this operation at this time'
2023-07-27 10:13:45 -04:00
when :azure
2024-01-26 03:53:44 -05:00
fail_with_message 'The azure storage driver is not supported for this operation at this time'
2019-12-08 09:37:12 -05:00
when :filesystem
require 'find'
2024-08-30 05:46:09 -04:00
root_path = Rails . configuration . x . file_storage_root_path . gsub ( ':rails_root' , Rails . root . to_s )
2019-12-08 09:37:12 -05:00
2020-03-25 20:56:41 -04:00
Find . find ( File . join ( * [ root_path , prefix ] . compact ) ) do | path |
2019-12-08 09:37:12 -05:00
next if File . directory? ( path )
2020-04-26 17:29:08 -04:00
key = path . gsub ( " #{ root_path } #{ File :: SEPARATOR } " , '' )
path_segments = key . split ( File :: SEPARATOR )
path_segments . delete ( 'cache' )
2023-05-03 23:33:55 -04:00
unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-15 12:41:27 -04:00
progress . log ( pastel . yellow ( " Unrecognized file found: #{ key } " ) )
next
end
2020-03-25 20:56:41 -04:00
model_name = path_segments . first . classify
2024-05-15 09:11:13 -04:00
record_id = path_segments [ 2 ... - 2 ] . join . to_i
2020-03-25 20:56:41 -04:00
attachment_name = path_segments [ 1 ] . singularize
file_name = path_segments . last
2024-10-04 04:30:08 -04:00
next unless PRELOADED_MODELS . include? ( model_name )
2020-03-25 20:56:41 -04:00
record = model_name . constantize . find_by ( id : record_id )
attachment = record & . public_send ( attachment_name )
2019-12-08 09:37:12 -05:00
progress . increment
2020-03-25 20:56:41 -04:00
next unless attachment . blank? || ! attachment . variant? ( file_name )
2019-12-08 09:37:12 -05:00
2019-12-08 22:26:00 -05:00
begin
size = File . size ( path )
2023-05-30 10:07:44 -04:00
unless dry_run?
2020-05-09 15:06:55 -04:00
File . delete ( path )
begin
FileUtils . rmdir ( File . dirname ( path ) , parents : true )
rescue Errno :: ENOTEMPTY
# OK
end
end
2019-12-08 22:26:00 -05:00
reclaimed_bytes += size
removed += 1
progress . log ( " Found and removed orphan: #{ key } " )
rescue = > e
progress . log ( pastel . red ( " Error processing #{ key } : #{ e } " ) )
end
2019-12-08 09:37:12 -05:00
end
end
progress . total = progress . progress
progress . finish
2023-05-30 10:07:44 -04:00
say ( " Removed #{ removed } orphans (approx. #{ number_to_human_size ( reclaimed_bytes ) } ) #{ dry_run_mode_suffix } " , :green , true )
2019-12-08 09:37:12 -05:00
end
2019-09-10 09:29:12 -04:00
option :account , type : :string
option :domain , type : :string
option :status , type : :numeric
2022-08-24 22:40:17 -04:00
option :days , type : :numeric
2019-09-10 09:29:12 -04:00
option :concurrency , type : :numeric , default : 5 , aliases : [ :c ]
option :verbose , type : :boolean , default : false , aliases : [ :v ]
option :dry_run , type : :boolean , default : false
2019-10-07 23:59:10 -04:00
option :force , type : :boolean , default : false
2019-09-10 09:29:12 -04:00
desc 'refresh' , 'Fetch remote media files'
long_desc <<-DESC
Re - downloads media attachments from other servers . You must specify the
source of media attachments with one of the following options :
Use the - - status option to download attachments from a specific status ,
using the status local numeric ID .
Use the - - account option to download attachments from a specific account ,
using username @domain handle of the account .
Use the - - domain option to download attachments from a specific domain .
2019-10-07 23:59:10 -04:00
2022-08-24 22:40:17 -04:00
Use the - - days option to limit attachments created within days .
2019-10-07 23:59:10 -04:00
By default , attachments that are believed to be already downloaded will
not be re - downloaded . To force re - download of every URL , use - - force .
2019-09-10 09:29:12 -04:00
DESC
def refresh
if options [ :status ]
scope = MediaAttachment . where ( status_id : options [ :status ] )
elsif options [ :account ]
2020-05-14 01:45:52 -04:00
username , domain = options [ :account ] . split ( '@' )
2019-09-10 09:29:12 -04:00
account = Account . find_remote ( username , domain )
2024-01-26 03:53:44 -05:00
fail_with_message 'No such account' if account . nil?
2019-09-10 09:29:12 -04:00
scope = MediaAttachment . where ( account_id : account . id )
elsif options [ :domain ]
scope = MediaAttachment . joins ( :account ) . merge ( Account . by_domain_and_subdomains ( options [ :domain ] ) )
2022-08-24 22:40:17 -04:00
elsif options [ :days ] . present?
scope = MediaAttachment . remote
2019-09-10 09:29:12 -04:00
else
2024-01-26 03:53:44 -05:00
fail_with_message 'Specify the source of media attachments'
2019-09-10 09:29:12 -04:00
end
2023-02-18 06:37:47 -05:00
scope = scope . where ( 'media_attachments.id > ?' , Mastodon :: Snowflake . id_at ( options [ :days ] . days . ago , with_random : false ) ) if options [ :days ] . present?
2022-08-24 22:40:17 -04:00
2019-09-10 09:29:12 -04:00
processed , aggregate = parallelize_with_progress ( scope ) do | media_attachment |
2019-10-07 23:59:10 -04:00
next if media_attachment . remote_url . blank? || ( ! options [ :force ] && media_attachment . file_file_name . present? )
2021-10-28 13:30:44 -04:00
next if DomainBlock . reject_media? ( media_attachment . account . domain )
2019-09-10 09:29:12 -04:00
2023-05-30 10:07:44 -04:00
unless dry_run?
2020-06-29 07:56:55 -04:00
media_attachment . reset_file!
media_attachment . reset_thumbnail!
2019-09-10 09:29:12 -04:00
media_attachment . save
end
2020-06-29 07:56:55 -04:00
media_attachment . file_file_size + ( media_attachment . thumbnail_file_size || 0 )
2019-09-10 09:29:12 -04:00
end
2023-05-30 10:07:44 -04:00
say ( " Downloaded #{ processed } media attachments (approx. #{ number_to_human_size ( aggregate ) } ) #{ dry_run_mode_suffix } " , :green , true )
2019-09-10 09:29:12 -04:00
end
2019-10-07 14:04:56 -04:00
desc 'usage' , 'Calculate disk space consumed by Mastodon'
def usage
2024-10-03 08:13:54 -04:00
print_table [
%w( Object Total Local ) ,
* object_storage_summary ,
]
2019-10-07 14:04:56 -04:00
end
2019-11-04 06:55:20 -05:00
2020-03-25 20:56:41 -04:00
desc 'lookup URL' , 'Lookup where media is displayed by passing a media URL'
def lookup ( url )
2020-04-26 17:29:08 -04:00
path = Addressable :: URI . parse ( url ) . path
2022-12-14 13:50:07 -05:00
path_segments = path . split ( '/' ) [ 2 .. ]
2020-04-26 17:29:08 -04:00
path_segments . delete ( 'cache' )
2024-01-26 03:53:44 -05:00
fail_with_message 'Not a media URL' unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-15 12:41:27 -04:00
2020-04-26 17:29:08 -04:00
model_name = path_segments . first . classify
2024-05-15 09:11:13 -04:00
record_id = path_segments [ 2 ... - 2 ] . join . to_i
2019-11-04 06:55:20 -05:00
2024-10-04 04:30:08 -04:00
fail_with_message " Cannot find corresponding model: #{ model_name } " unless PRELOADED_MODELS . include? ( model_name )
2019-11-04 06:55:20 -05:00
2020-03-25 20:56:41 -04:00
record = model_name . constantize . find_by ( id : record_id )
record = record . status if record . respond_to? ( :status )
2019-11-04 06:55:20 -05:00
2024-01-26 03:53:44 -05:00
fail_with_message 'Cannot find corresponding record' unless record
2020-03-25 20:56:41 -04:00
display_url = ActivityPub :: TagManager . instance . url_for ( record )
2024-01-26 03:53:44 -05:00
fail_with_message 'No public URL for this type of record' if display_url . blank?
2020-03-25 20:56:41 -04:00
say ( display_url , :blue )
rescue Addressable :: URI :: InvalidURIError
2024-01-26 03:53:44 -05:00
fail_with_message 'Invalid URL'
2020-03-25 20:56:41 -04:00
end
private
2024-10-03 08:13:54 -04:00
def object_storage_summary
[
[ :attachments , MediaAttachment . sum ( combined_media_sum ) , MediaAttachment . where ( account : Account . local ) . sum ( combined_media_sum ) ] ,
[ :custom_emoji , CustomEmoji . sum ( :image_file_size ) , CustomEmoji . local . sum ( :image_file_size ) ] ,
[ :avatars , Account . sum ( :avatar_file_size ) , Account . local . sum ( :avatar_file_size ) ] ,
[ :headers , Account . sum ( :header_file_size ) , Account . local . sum ( :header_file_size ) ] ,
[ :preview_cards , PreviewCard . sum ( :image_file_size ) , nil ] ,
[ :backups , Backup . sum ( :dump_file_size ) , nil ] ,
[ :imports , Import . sum ( :data_file_size ) , nil ] ,
[ :settings , SiteUpload . sum ( :file_file_size ) , nil ] ,
] . map { | label , total , local | [ label . to_s . titleize , number_to_human_size ( total ) , local . present? ? number_to_human_size ( local ) : nil ] }
2024-03-14 11:22:52 -04:00
end
2024-10-03 08:13:54 -04:00
def combined_media_sum
Arel . sql ( << ~ SQL . squish )
COALESCE ( file_file_size , 0 ) + COALESCE ( thumbnail_file_size , 0 )
SQL
2024-03-14 11:22:52 -04:00
end
2024-10-04 04:30:08 -04:00
PRELOADED_MODELS = %w(
2020-03-25 20:56:41 -04:00
Account
Backup
CustomEmoji
Import
MediaAttachment
PreviewCard
SiteUpload
) . freeze
def preload_records_from_mixed_objects ( objects )
preload_map = Hash . new { | hash , key | hash [ key ] = [ ] }
objects . map do | object |
2020-04-26 17:29:08 -04:00
segments = object . key . split ( '/' )
segments . delete ( 'cache' )
2023-05-03 23:33:55 -04:00
next unless VALID_PATH_SEGMENTS_SIZE . include? ( segments . size )
2020-05-15 12:41:27 -04:00
2020-03-25 20:56:41 -04:00
model_name = segments . first . classify
2024-05-15 09:11:13 -04:00
record_id = segments [ 2 ... - 2 ] . join . to_i
2020-03-25 20:56:41 -04:00
2024-10-04 04:30:08 -04:00
next unless PRELOADED_MODELS . include? ( model_name )
2020-03-25 20:56:41 -04:00
preload_map [ model_name ] << record_id
end
preload_map . each_with_object ( { } ) do | ( model_name , record_ids ) , model_map |
2021-01-12 03:27:38 -05:00
model_map [ model_name ] = model_name . constantize . where ( id : record_ids ) . index_by ( & :id )
2019-11-04 06:55:20 -05:00
end
end
2018-08-25 07:25:39 -04:00
end
end