Fix performance of percentile calculation for annual reports (#32765)

This commit is contained in:
Eugen Rochko 2024-11-04 11:11:06 +01:00 committed by GitHub
parent 823f597f00
commit 90f4ffa31d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 77 additions and 47 deletions

View file

@ -1,62 +1,37 @@
# frozen_string_literal: true
class AnnualReport::Percentiles < AnnualReport::Source
def self.prepare(year)
AnnualReport::StatusesPerAccountCount.connection.exec_query(<<~SQL.squish, nil, [year, Mastodon::Snowflake.id_at(DateTime.new(year).beginning_of_year), Mastodon::Snowflake.id_at(DateTime.new(year).end_of_year)])
INSERT INTO annual_report_statuses_per_account_counts (year, account_id, statuses_count)
SELECT $1, account_id, count(*)
FROM statuses
WHERE id BETWEEN $2 AND $3
AND (local OR uri IS NULL)
GROUP BY account_id
ON CONFLICT (year, account_id) DO NOTHING
SQL
end
def generate
{
percentiles: {
followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100,
statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100,
statuses: 100.0 - ((total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100),
},
}
end
private
def followers_gained
@followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count
end
def statuses_created
@statuses_created ||= report_statuses.count
end
def total_with_fewer_followers
@total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total
WITH tmp0 AS (
SELECT follows.target_account_id
FROM follows
INNER JOIN accounts ON accounts.id = follows.target_account_id
WHERE date_part('year', follows.created_at) = :year
AND accounts.domain IS NULL
GROUP BY follows.target_account_id
HAVING COUNT(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_fewer_statuses
@total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total
WITH tmp0 AS (
SELECT statuses.account_id
FROM statuses
INNER JOIN accounts ON accounts.id = statuses.account_id
WHERE statuses.id BETWEEN :min_id AND :max_id
AND accounts.domain IS NULL
GROUP BY statuses.account_id
HAVING count(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_any_followers
@total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id')
@total_with_fewer_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).where(statuses_count: ...statuses_created).count
end
def total_with_any_statuses
@total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id')
@total_with_any_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).count
end
end