Merge remote-tracking branch 'parent/main' into upstream-20240122
This commit is contained in:
commit
a4cc73438e
65 changed files with 1150 additions and 707 deletions
|
@ -223,7 +223,7 @@ module Mastodon::CLI
|
|||
say 'Deduplicating accounts… for local accounts, you will be asked to chose which account to keep unchanged.'
|
||||
|
||||
find_duplicate_accounts.each do |row|
|
||||
accounts = Account.where(id: row['ids'].split(',')).to_a
|
||||
accounts = Account.where(id: row['ids'].split(','))
|
||||
|
||||
if accounts.first.local?
|
||||
deduplicate_local_accounts!(accounts)
|
||||
|
@ -275,7 +275,7 @@ module Mastodon::CLI
|
|||
|
||||
def deduplicate_users_process_email
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users GROUP BY email HAVING count(*) > 1").each do |row|
|
||||
users = User.where(id: row['ids'].split(',')).sort_by(&:updated_at).reverse
|
||||
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a
|
||||
ref_user = users.shift
|
||||
say "Multiple users registered with e-mail address #{ref_user.email}.", :yellow
|
||||
say "e-mail will be disabled for the following accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
|
||||
|
@ -289,7 +289,7 @@ module Mastodon::CLI
|
|||
|
||||
def deduplicate_users_process_confirmation_token
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE confirmation_token IS NOT NULL GROUP BY confirmation_token HAVING count(*) > 1").each do |row|
|
||||
users = User.where(id: row['ids'].split(',')).sort_by(&:created_at).reverse.drop(1)
|
||||
users = User.where(id: row['ids'].split(',')).order(created_at: :desc).to_a.drop(1)
|
||||
say "Unsetting confirmation token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
|
||||
|
||||
users.each do |user|
|
||||
|
@ -301,7 +301,7 @@ module Mastodon::CLI
|
|||
def deduplicate_users_process_remember_token
|
||||
if migrator_version < 2022_01_18_183010
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE remember_token IS NOT NULL GROUP BY remember_token HAVING count(*) > 1").each do |row|
|
||||
users = User.where(id: row['ids'].split(',')).sort_by(&:updated_at).reverse.drop(1)
|
||||
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a.drop(1)
|
||||
say "Unsetting remember token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
|
||||
|
||||
users.each do |user|
|
||||
|
@ -313,7 +313,7 @@ module Mastodon::CLI
|
|||
|
||||
def deduplicate_users_process_password_token
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM users WHERE reset_password_token IS NOT NULL GROUP BY reset_password_token HAVING count(*) > 1").each do |row|
|
||||
users = User.where(id: row['ids'].split(',')).sort_by(&:updated_at).reverse.drop(1)
|
||||
users = User.where(id: row['ids'].split(',')).order(updated_at: :desc).to_a.drop(1)
|
||||
say "Unsetting password reset token for those accounts: #{users.map { |user| user.account.acct }.join(', ')}", :yellow
|
||||
|
||||
users.each do |user|
|
||||
|
@ -341,7 +341,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Removing duplicate account identity proofs…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM account_identity_proofs GROUP BY account_id, provider, provider_username HAVING count(*) > 1").each do |row|
|
||||
AccountIdentityProof.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
AccountIdentityProof.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring account identity proofs indexes…'
|
||||
|
@ -355,7 +355,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Removing duplicate announcement reactions…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM announcement_reactions GROUP BY account_id, announcement_id, name HAVING count(*) > 1").each do |row|
|
||||
AnnouncementReaction.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
AnnouncementReaction.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring announcement_reactions indexes…'
|
||||
|
@ -367,7 +367,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating conversations…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM conversations WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row|
|
||||
conversations = Conversation.where(id: row['ids'].split(',')).sort_by(&:id).reverse
|
||||
conversations = Conversation.where(id: row['ids'].split(',')).order(id: :desc).to_a
|
||||
|
||||
ref_conversation = conversations.shift
|
||||
|
||||
|
@ -390,7 +390,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating custom_emojis…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emojis GROUP BY shortcode, domain HAVING count(*) > 1").each do |row|
|
||||
emojis = CustomEmoji.where(id: row['ids'].split(',')).sort_by(&:id).reverse
|
||||
emojis = CustomEmoji.where(id: row['ids'].split(',')).order(id: :desc).to_a
|
||||
|
||||
ref_emoji = emojis.shift
|
||||
|
||||
|
@ -409,7 +409,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating custom_emoji_categories…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM custom_emoji_categories GROUP BY name HAVING count(*) > 1").each do |row|
|
||||
categories = CustomEmojiCategory.where(id: row['ids'].split(',')).sort_by(&:id).reverse
|
||||
categories = CustomEmojiCategory.where(id: row['ids'].split(',')).order(id: :desc).to_a
|
||||
|
||||
ref_category = categories.shift
|
||||
|
||||
|
@ -428,7 +428,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating domain_allows…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM domain_allows GROUP BY domain HAVING count(*) > 1").each do |row|
|
||||
DomainAllow.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
DomainAllow.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring domain_allows indexes…'
|
||||
|
@ -466,7 +466,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating unavailable_domains…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM unavailable_domains GROUP BY domain HAVING count(*) > 1").each do |row|
|
||||
UnavailableDomain.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
UnavailableDomain.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring unavailable_domains indexes…'
|
||||
|
@ -478,7 +478,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating email_domain_blocks…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM email_domain_blocks GROUP BY domain HAVING count(*) > 1").each do |row|
|
||||
domain_blocks = EmailDomainBlock.where(id: row['ids'].split(',')).sort_by { |b| b.parent.nil? ? 1 : 0 }.to_a
|
||||
domain_blocks = EmailDomainBlock.where(id: row['ids'].split(',')).order(EmailDomainBlock.arel_table[:parent_id].asc.nulls_first).to_a
|
||||
domain_blocks.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
|
@ -507,7 +507,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating preview_cards…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM preview_cards GROUP BY url HAVING count(*) > 1").each do |row|
|
||||
PreviewCard.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
PreviewCard.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring preview_cards indexes…'
|
||||
|
@ -519,7 +519,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating statuses…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM statuses WHERE uri IS NOT NULL GROUP BY uri HAVING count(*) > 1").each do |row|
|
||||
statuses = Status.where(id: row['ids'].split(',')).sort_by(&:id)
|
||||
statuses = Status.where(id: row['ids'].split(',')).order(id: :asc).to_a
|
||||
ref_status = statuses.shift
|
||||
statuses.each do |status|
|
||||
merge_statuses!(ref_status, status) if status.account_id == ref_status.account_id
|
||||
|
@ -541,7 +541,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating tags…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM tags GROUP BY lower((name)::text) HAVING count(*) > 1").each do |row|
|
||||
tags = Tag.where(id: row['ids'].split(',')).sort_by { |t| [t.usable?, t.trendable?, t.listable?].count(false) }
|
||||
tags = Tag.where(id: row['ids'].split(',')).order(Arel.sql('(usable::int + trendable::int + listable::int) desc')).to_a
|
||||
ref_tag = tags.shift
|
||||
tags.each do |tag|
|
||||
merge_tags!(ref_tag, tag)
|
||||
|
@ -564,7 +564,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating webauthn_credentials…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webauthn_credentials GROUP BY external_id HAVING count(*) > 1").each do |row|
|
||||
WebauthnCredential.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
WebauthnCredential.where(id: row['ids'].split(',')).order(id: :desc).to_a.drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring webauthn_credentials indexes…'
|
||||
|
@ -578,7 +578,7 @@ module Mastodon::CLI
|
|||
|
||||
say 'Deduplicating webhooks…'
|
||||
ActiveRecord::Base.connection.select_all("SELECT string_agg(id::text, ',') AS ids FROM webhooks GROUP BY url HAVING count(*) > 1").each do |row|
|
||||
Webhook.where(id: row['ids'].split(',')).sort_by(&:id).reverse.drop(1).each(&:destroy)
|
||||
Webhook.where(id: row['ids'].split(',')).order(id: :desc).drop(1).each(&:destroy)
|
||||
end
|
||||
|
||||
say 'Restoring webhooks indexes…'
|
||||
|
@ -590,8 +590,8 @@ module Mastodon::CLI
|
|||
SoftwareUpdate.delete_all
|
||||
end
|
||||
|
||||
def deduplicate_local_accounts!(accounts)
|
||||
accounts = accounts.sort_by(&:id).reverse
|
||||
def deduplicate_local_accounts!(scope)
|
||||
accounts = scope.order(id: :desc).to_a
|
||||
|
||||
say "Multiple local accounts were found for username '#{accounts.first.username}'.", :yellow
|
||||
say 'All those accounts are distinct accounts but only the most recently-created one is fully-functional.', :yellow
|
||||
|
@ -629,8 +629,8 @@ module Mastodon::CLI
|
|||
end
|
||||
end
|
||||
|
||||
def deduplicate_remote_accounts!(accounts)
|
||||
accounts = accounts.sort_by(&:updated_at).reverse
|
||||
def deduplicate_remote_accounts!(scope)
|
||||
accounts = scope.order(updated_at: :desc).to_a
|
||||
|
||||
reference_account = accounts.shift
|
||||
|
||||
|
|
|
@ -8,15 +8,15 @@
|
|||
# shorten temporary column names.
|
||||
|
||||
# Documentation on using these functions (and why one might do so):
|
||||
# https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/what_requires_downtime.md
|
||||
# https://gitlab.com/gitlab-org/gitlab-foss/-/blob/master/doc/development/database/avoiding_downtime_in_migrations.md
|
||||
|
||||
# The file itself:
|
||||
# https://gitlab.com/gitlab-org/gitlab-ce/blob/master/lib/gitlab/database/migration_helpers.rb
|
||||
# The original file (since updated):
|
||||
# https://gitlab.com/gitlab-org/gitlab-foss/-/blob/master/lib/gitlab/database/migration_helpers.rb
|
||||
|
||||
# It is licensed as follows:
|
||||
|
||||
# Copyright (c) 2011-2017 GitLab B.V.
|
||||
|
||||
# Copyright (c) 2011-present GitLab B.V.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
|
@ -24,16 +24,16 @@
|
|||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# This is bad form, but there are enough differences that it's impractical to do
|
||||
# otherwise:
|
||||
|
@ -77,37 +77,12 @@ module Mastodon
|
|||
end
|
||||
end
|
||||
|
||||
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
|
||||
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
|
||||
|
||||
# Gets an estimated number of rows for a table
|
||||
def estimate_rows_in_table(table_name)
|
||||
exec_query('SELECT reltuples FROM pg_class WHERE relname = ' +
|
||||
"'#{table_name}'").to_a.first['reltuples']
|
||||
end
|
||||
|
||||
# Adds `created_at` and `updated_at` columns with timezone information.
|
||||
#
|
||||
# This method is an improved version of Rails' built-in method `add_timestamps`.
|
||||
#
|
||||
# Available options are:
|
||||
# default - The default value for the column.
|
||||
# null - When set to `true` the column will allow NULL values.
|
||||
# The default is to not allow NULL values.
|
||||
def add_timestamps_with_timezone(table_name, **options)
|
||||
options[:null] = false if options[:null].nil?
|
||||
|
||||
[:created_at, :updated_at].each do |column_name|
|
||||
if options[:default] && transaction_open?
|
||||
raise '`add_timestamps_with_timezone` with default value cannot be run inside a transaction. ' \
|
||||
'You can disable transactions by calling `disable_ddl_transaction!` ' \
|
||||
'in the body of your migration class'
|
||||
end
|
||||
|
||||
add_column(table_name, column_name, :datetime_with_timezone, **options)
|
||||
end
|
||||
end
|
||||
|
||||
# Creates a new index, concurrently when supported
|
||||
#
|
||||
# On PostgreSQL this method creates an index concurrently, on MySQL this
|
||||
|
@ -746,39 +721,6 @@ module Mastodon
|
|||
rename_index table_name, "#{index_name}_new", index_name
|
||||
end
|
||||
|
||||
# This will replace the first occurrence of a string in a column with
|
||||
# the replacement
|
||||
# On postgresql we can use `regexp_replace` for that.
|
||||
# On mysql we find the location of the pattern, and overwrite it
|
||||
# with the replacement
|
||||
def replace_sql(column, pattern, replacement)
|
||||
quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
|
||||
quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
|
||||
|
||||
replace = Arel::Nodes::NamedFunction
|
||||
.new("regexp_replace", [column, quoted_pattern, quoted_replacement])
|
||||
Arel::Nodes::SqlLiteral.new(replace.to_sql)
|
||||
end
|
||||
|
||||
def remove_foreign_key_without_error(*args)
|
||||
remove_foreign_key(*args)
|
||||
rescue ArgumentError
|
||||
end
|
||||
|
||||
def sidekiq_queue_migrate(queue_from, to:)
|
||||
while sidekiq_queue_length(queue_from) > 0
|
||||
Sidekiq.redis do |conn|
|
||||
conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def sidekiq_queue_length(queue_name)
|
||||
Sidekiq.redis do |conn|
|
||||
conn.llen("queue:#{queue_name}")
|
||||
end
|
||||
end
|
||||
|
||||
def check_trigger_permissions!(table)
|
||||
unless Grant.create_and_execute_trigger?(table)
|
||||
dbname = ActiveRecord::Base.configurations[Rails.env]['database']
|
||||
|
@ -799,91 +741,6 @@ into similar problems in the future (e.g. when new tables are created).
|
|||
end
|
||||
end
|
||||
|
||||
# Bulk queues background migration jobs for an entire table, batched by ID range.
|
||||
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
|
||||
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
|
||||
#
|
||||
# model_class - The table being iterated over
|
||||
# job_class_name - The background migration job class as a string
|
||||
# batch_size - The maximum number of rows per job
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# class Route < ActiveRecord::Base
|
||||
# include EachBatch
|
||||
# self.table_name = 'routes'
|
||||
# end
|
||||
#
|
||||
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
|
||||
#
|
||||
# Where the model_class includes EachBatch, and the background migration exists:
|
||||
#
|
||||
# class Gitlab::BackgroundMigration::ProcessRoutes
|
||||
# def perform(start_id, end_id)
|
||||
# # do something
|
||||
# end
|
||||
# end
|
||||
def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
|
||||
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
|
||||
|
||||
jobs = []
|
||||
|
||||
model_class.each_batch(of: batch_size) do |relation|
|
||||
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
|
||||
|
||||
if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
|
||||
# Note: This code path generally only helps with many millions of rows
|
||||
# We push multiple jobs at a time to reduce the time spent in
|
||||
# Sidekiq/Redis operations. We're using this buffer based approach so we
|
||||
# don't need to run additional queries for every range.
|
||||
BackgroundMigrationWorker.perform_bulk(jobs)
|
||||
jobs.clear
|
||||
end
|
||||
|
||||
jobs << [job_class_name, [start_id, end_id]]
|
||||
end
|
||||
|
||||
BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
|
||||
end
|
||||
|
||||
# Queues background migration jobs for an entire table, batched by ID range.
|
||||
# Each job is scheduled with a `delay_interval` in between.
|
||||
# If you use a small interval, then some jobs may run at the same time.
|
||||
#
|
||||
# model_class - The table being iterated over
|
||||
# job_class_name - The background migration job class as a string
|
||||
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
|
||||
# batch_size - The maximum number of rows per job
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# class Route < ActiveRecord::Base
|
||||
# include EachBatch
|
||||
# self.table_name = 'routes'
|
||||
# end
|
||||
#
|
||||
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
|
||||
#
|
||||
# Where the model_class includes EachBatch, and the background migration exists:
|
||||
#
|
||||
# class Gitlab::BackgroundMigration::ProcessRoutes
|
||||
# def perform(start_id, end_id)
|
||||
# # do something
|
||||
# end
|
||||
# end
|
||||
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
|
||||
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
|
||||
|
||||
model_class.each_batch(of: batch_size) do |relation, index|
|
||||
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
|
||||
|
||||
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
|
||||
# the same time, which is not helpful in most cases where we wish to
|
||||
# spread the work over time.
|
||||
BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# https://github.com/rails/rails/blob/v5.2.0/activerecord/lib/active_record/connection_adapters/postgresql/schema_statements.rb#L678-L684
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue