Add annual reports for accounts (#28693)

This commit is contained in:
Eugen Rochko 2024-01-24 10:38:10 +01:00 committed by GitHub
parent 01ce9df880
commit 5b1eb09d54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 480 additions and 1 deletions

43
app/lib/annual_report.rb Normal file
View file

@ -0,0 +1,43 @@
# frozen_string_literal: true
class AnnualReport
include DatabaseHelper
SOURCES = [
AnnualReport::Archetype,
AnnualReport::TypeDistribution,
AnnualReport::TopStatuses,
AnnualReport::MostUsedApps,
AnnualReport::CommonlyInteractedWithAccounts,
AnnualReport::TimeSeries,
AnnualReport::TopHashtags,
AnnualReport::MostRebloggedAccounts,
AnnualReport::Percentiles,
].freeze
SCHEMA = 1
def initialize(account, year)
@account = account
@year = year
end
def generate
return if GeneratedAnnualReport.exists?(account: @account, year: @year)
GeneratedAnnualReport.create(
account: @account,
year: @year,
schema_version: SCHEMA,
data: data
)
end
private
def data
with_read_replica do
SOURCES.each_with_object({}) { |klass, hsh| hsh.merge!(klass.new(@account, @year).generate) }
end
end
end

View file

@ -0,0 +1,49 @@
# frozen_string_literal: true
class AnnualReport::Archetype < AnnualReport::Source
# Average number of posts (including replies and reblogs) made by
# each active user in a single year (2023)
AVERAGE_PER_YEAR = 113
def generate
{
archetype: archetype,
}
end
private
def archetype
if (standalone_count + replies_count + reblogs_count) < AVERAGE_PER_YEAR
:lurker
elsif reblogs_count > (standalone_count * 2)
:booster
elsif polls_count > (standalone_count * 0.1) # standalone_count includes posts with polls
:pollster
elsif replies_count > (standalone_count * 2)
:replier
else
:oracle
end
end
def polls_count
@polls_count ||= base_scope.where.not(poll_id: nil).count
end
def reblogs_count
@reblogs_count ||= base_scope.where.not(reblog_of_id: nil).count
end
def replies_count
@replies_count ||= base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count
end
def standalone_count
@standalone_count ||= base_scope.without_replies.without_reblogs.count
end
def base_scope
@account.statuses.where(id: year_as_snowflake_range)
end
end

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::CommonlyInteractedWithAccounts < AnnualReport::Source
SET_SIZE = 40
def generate
{
commonly_interacted_with_accounts: commonly_interacted_with_accounts.map do |(account_id, count)|
{
account_id: account_id,
count: count,
}
end,
}
end
private
def commonly_interacted_with_accounts
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(in_reply_to_account_id: @account.id).group(:in_reply_to_account_id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('in_reply_to_account_id, count(*) AS total'))
end
end

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::MostRebloggedAccounts < AnnualReport::Source
SET_SIZE = 10
def generate
{
most_reblogged_accounts: most_reblogged_accounts.map do |(account_id, count)|
{
account_id: account_id,
count: count,
}
end,
}
end
private
def most_reblogged_accounts
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(reblog_of_id: nil).joins(reblog: :account).group('accounts.id').having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('accounts.id, count(*) as total'))
end
end

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::MostUsedApps < AnnualReport::Source
SET_SIZE = 10
def generate
{
most_used_apps: most_used_apps.map do |(name, count)|
{
name: name,
count: count,
}
end,
}
end
private
def most_used_apps
@account.statuses.reorder(nil).where(id: year_as_snowflake_range).joins(:application).group('oauth_applications.name').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('oauth_applications.name, count(*) as total'))
end
end

View file

@ -0,0 +1,62 @@
# frozen_string_literal: true
class AnnualReport::Percentiles < AnnualReport::Source
def generate
{
percentiles: {
followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100,
statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100,
},
}
end
private
def followers_gained
@followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count
end
def statuses_created
@statuses_created ||= @account.statuses.where(id: year_as_snowflake_range).count
end
def total_with_fewer_followers
@total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total
WITH tmp0 AS (
SELECT follows.target_account_id
FROM follows
INNER JOIN accounts ON accounts.id = follows.target_account_id
WHERE date_part('year', follows.created_at) = :year
AND accounts.domain IS NULL
GROUP BY follows.target_account_id
HAVING COUNT(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_fewer_statuses
@total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total
WITH tmp0 AS (
SELECT statuses.account_id
FROM statuses
INNER JOIN accounts ON accounts.id = statuses.account_id
WHERE statuses.id BETWEEN :min_id AND :max_id
AND accounts.domain IS NULL
GROUP BY statuses.account_id
HAVING count(*) < :comparison
)
SELECT count(*) AS total
FROM tmp0
SQL
end
def total_with_any_followers
@total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id')
end
def total_with_any_statuses
@total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id')
end
end

View file

@ -0,0 +1,16 @@
# frozen_string_literal: true
class AnnualReport::Source
attr_reader :account, :year
def initialize(account, year)
@account = account
@year = year
end
protected
def year_as_snowflake_range
(Mastodon::Snowflake.id_at(DateTime.new(year, 1, 1))..Mastodon::Snowflake.id_at(DateTime.new(year, 12, 31)))
end
end

View file

@ -0,0 +1,30 @@
# frozen_string_literal: true
class AnnualReport::TimeSeries < AnnualReport::Source
def generate
{
time_series: (1..12).map do |month|
{
month: month,
statuses: statuses_per_month[month] || 0,
following: following_per_month[month] || 0,
followers: followers_per_month[month] || 0,
}
end,
}
end
private
def statuses_per_month
@statuses_per_month ||= @account.statuses.reorder(nil).where(id: year_as_snowflake_range).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
def following_per_month
@following_per_month ||= @account.active_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
def followers_per_month
@followers_per_month ||= @account.passive_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h
end
end

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AnnualReport::TopHashtags < AnnualReport::Source
SET_SIZE = 40
def generate
{
top_hashtags: top_hashtags.map do |(name, count)|
{
name: name,
count: count,
}
end,
}
end
private
def top_hashtags
Tag.joins(:statuses).where(statuses: { id: @account.statuses.where(id: year_as_snowflake_range).reorder(nil).select(:id) }).group(:id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('COALESCE(tags.display_name, tags.name), count(*) AS total'))
end
end

View file

@ -0,0 +1,21 @@
# frozen_string_literal: true
class AnnualReport::TopStatuses < AnnualReport::Source
def generate
top_reblogs = base_scope.order(reblogs_count: :desc).first&.id
top_favourites = base_scope.where.not(id: top_reblogs).order(favourites_count: :desc).first&.id
top_replies = base_scope.where.not(id: [top_reblogs, top_favourites]).order(replies_count: :desc).first&.id
{
top_statuses: {
by_reblogs: top_reblogs,
by_favourites: top_favourites,
by_replies: top_replies,
},
}
end
def base_scope
@account.statuses.with_public_visibility.joins(:status_stat).where(id: year_as_snowflake_range).reorder(nil)
end
end

View file

@ -0,0 +1,20 @@
# frozen_string_literal: true
class AnnualReport::TypeDistribution < AnnualReport::Source
def generate
{
type_distribution: {
total: base_scope.count,
reblogs: base_scope.where.not(reblog_of_id: nil).count,
replies: base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count,
standalone: base_scope.without_replies.without_reblogs.count,
},
}
end
private
def base_scope
@account.statuses.where(id: year_as_snowflake_range)
end
end