Add Fetch All Replies Part 1: Backend (#32615)

Signed-off-by: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Co-authored-by: jonny <j@nny.fyi>
Co-authored-by: Claire <claire.github-309c@sitedethib.com>
Co-authored-by: Kouhai <66407198+kouhaidev@users.noreply.github.com>
This commit is contained in:
Jonny Saunders 2025-03-12 02:03:01 -07:00 committed by GitHub
parent 2fe7172002
commit 46e13dd81c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 874 additions and 25 deletions

View file

@ -0,0 +1,132 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe Status::FetchRepliesConcern do
ActiveRecord.verbose_query_logs = true
let!(:alice) { Fabricate(:account, username: 'alice') }
let!(:bob) { Fabricate(:account, username: 'bob', domain: 'other.com') }
let!(:account) { alice }
let!(:status_old) { Fabricate(:status, account: account, fetched_replies_at: 1.year.ago, created_at: 1.year.ago) }
let!(:status_fetched_recently) { Fabricate(:status, account: account, fetched_replies_at: 1.second.ago, created_at: 1.year.ago) }
let!(:status_created_recently) { Fabricate(:status, account: account, created_at: 1.second.ago) }
let!(:status_never_fetched) { Fabricate(:status, account: account, created_at: 1.year.ago) }
describe 'should_fetch_replies' do
let!(:statuses) { Status.should_fetch_replies.all }
context 'with a local status' do
it 'never fetches local replies' do
expect(statuses).to eq([])
end
end
context 'with a remote status' do
let(:account) { bob }
it 'fetches old statuses' do
expect(statuses).to include(status_old)
end
it 'fetches statuses that have never been fetched and weren\'t created recently' do
expect(statuses).to include(status_never_fetched)
end
it 'does not fetch statuses that were fetched recently' do
expect(statuses).to_not include(status_fetched_recently)
end
it 'does not fetch statuses that were created recently' do
expect(statuses).to_not include(status_created_recently)
end
end
end
describe 'should_not_fetch_replies' do
let!(:statuses) { Status.should_not_fetch_replies.all }
context 'with a local status' do
it 'does not fetch local statuses' do
expect(statuses).to include(status_old, status_never_fetched, status_fetched_recently, status_never_fetched)
end
end
context 'with a remote status' do
let(:account) { bob }
it 'fetches old statuses' do
expect(statuses).to_not include(status_old)
end
it 'fetches statuses that have never been fetched and weren\'t created recently' do
expect(statuses).to_not include(status_never_fetched)
end
it 'does not fetch statuses that were fetched recently' do
expect(statuses).to include(status_fetched_recently)
end
it 'does not fetch statuses that were created recently' do
expect(statuses).to include(status_created_recently)
end
end
end
describe 'unsubscribed' do
let!(:spike) { Fabricate(:account, username: 'spike', domain: 'other.com') }
let!(:status) { Fabricate(:status, account: bob, updated_at: 1.day.ago) }
context 'when the status is from an account with only remote followers after last update' do
before do
Fabricate(:follow, account: spike, target_account: bob)
end
it 'shows the status as unsubscribed' do
expect(Status.unsubscribed).to eq([status])
expect(status.unsubscribed?).to be(true)
end
end
context 'when the status is from an account with only remote followers before last update' do
before do
Fabricate(:follow, account: spike, target_account: bob, created_at: 2.days.ago)
end
it 'shows the status as unsubscribed' do
expect(Status.unsubscribed).to eq([status])
expect(status.unsubscribed?).to be(true)
end
end
context 'when status is from account with local followers after last update' do
before do
Fabricate(:follow, account: alice, target_account: bob)
end
it 'shows the status as unsubscribed' do
expect(Status.unsubscribed).to eq([status])
expect(status.unsubscribed?).to be(true)
end
end
context 'when status is from account with local followers before last update' do
before do
Fabricate(:follow, account: alice, target_account: bob, created_at: 2.days.ago)
end
it 'does not show the status as unsubscribed' do
expect(Status.unsubscribed).to eq([])
expect(status.unsubscribed?).to be(false)
end
end
context 'when the status has no followers' do
it 'shows the status as unsubscribed' do
expect(Status.unsubscribed).to eq([status])
expect(status.unsubscribed?).to be(true)
end
end
end
end

View file

@ -0,0 +1,90 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ActivityPub::FetchAllRepliesService do
subject { described_class.new }
let(:actor) { Fabricate(:account, domain: 'example.com', uri: 'http://example.com/account') }
let(:status) { Fabricate(:status, account: actor) }
let(:collection_uri) { 'http://example.com/replies/1' }
let(:items) do
[
'http://example.com/self-reply-1',
'http://example.com/self-reply-2',
'http://example.com/self-reply-3',
'http://other.com/other-reply-1',
'http://other.com/other-reply-2',
'http://other.com/other-reply-3',
'http://example.com/self-reply-4',
'http://example.com/self-reply-5',
'http://example.com/self-reply-6',
]
end
let(:payload) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
type: 'Collection',
id: collection_uri,
items: items,
}.with_indifferent_access
end
describe '#call' do
it 'fetches more than the default maximum and from multiple domains' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-2 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4
http://example.com/self-reply-5 http://example.com/self-reply-6))
end
context 'with a recent status' do
before do
Fabricate(:status, uri: 'http://example.com/self-reply-2', fetched_replies_at: 1.second.ago, local: false)
end
it 'skips statuses that have been updated recently' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4 http://example.com/self-reply-5 http://example.com/self-reply-6))
end
end
context 'with an old status' do
before do
Fabricate(:status, uri: 'http://other.com/other-reply-1', fetched_replies_at: 1.year.ago, created_at: 1.year.ago, account: actor)
end
it 'updates the time that fetched statuses were last fetched' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri)
expect(Status.find_by(uri: 'http://other.com/other-reply-1').fetched_replies_at).to be >= 1.minute.ago
end
end
context 'with unsubscribed replies' do
before do
remote_actor = Fabricate(:account, domain: 'other.com', uri: 'http://other.com/account')
# reply not in the collection from the remote instance, but we know about anyway without anyone following the account
Fabricate(:status, account: remote_actor, in_reply_to_id: status.id, uri: 'http://other.com/account/unsubscribed', fetched_replies_at: 1.year.ago, created_at: 1.year.ago)
end
it 'updates the unsubscribed replies' do
allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-2 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4
http://example.com/self-reply-5 http://example.com/self-reply-6 http://other.com/account/unsubscribed))
end
end
end
end

View file

@ -9,6 +9,9 @@ RSpec.describe ActivityPub::FetchRemoteStatusService do
let!(:sender) { Fabricate(:account, domain: 'foo.bar', uri: 'https://foo.bar') }
let(:follower) { Fabricate(:account, username: 'alice') }
let(:follow) { nil }
let(:response) { { body: Oj.dump(object), headers: { 'content-type': 'application/activity+json' } } }
let(:existing_status) { nil }
let(:note) do
@ -23,13 +26,14 @@ RSpec.describe ActivityPub::FetchRemoteStatusService do
before do
stub_request(:get, 'https://foo.bar/watch?v=12345').to_return(status: 404, body: '')
stub_request(:get, object[:id]).to_return(body: Oj.dump(object))
stub_request(:get, object[:id]).to_return(**response)
end
describe '#call' do
before do
follow
existing_status
subject.call(object[:id], prefetched_body: Oj.dump(object))
subject.call(object[:id])
end
context 'with Note object' do
@ -254,6 +258,51 @@ RSpec.describe ActivityPub::FetchRemoteStatusService do
expect(existing_status.text).to eq 'Lorem ipsum'
expect(existing_status.edits).to_not be_empty
end
context 'when the status appears to have been deleted at source' do
let(:response) { { status: 404, body: '' } }
shared_examples 'no delete' do
it 'does not delete the status' do
existing_status.reload
expect(existing_status.text).to eq 'Foo'
expect(existing_status.edits).to be_empty
end
end
context 'when the status is orphaned/unsubscribed' do
it 'deletes the orphaned status' do
expect { existing_status.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
context 'when the status is from an account with only remote followers' do
let(:follower) { Fabricate(:account, username: 'alice', domain: 'foo.bar') }
let(:follow) { Fabricate(:follow, account: follower, target_account: sender, created_at: 2.days.ago) }
it 'deletes the orphaned status' do
expect { existing_status.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
context 'when the status is private' do
let(:existing_status) { Fabricate(:status, account: sender, text: 'Foo', uri: note[:id], visibility: :private) }
it_behaves_like 'no delete'
end
context 'when the status is direct' do
let(:existing_status) { Fabricate(:status, account: sender, text: 'Foo', uri: note[:id], visibility: :direct) }
it_behaves_like 'no delete'
end
end
context 'when the status is from an account with local followers' do
let(:follow) { Fabricate(:follow, account: follower, target_account: sender, created_at: 2.days.ago) }
it_behaves_like 'no delete'
end
end
end
context 'with a Create activity' do

View file

@ -0,0 +1,280 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ActivityPub::FetchAllRepliesWorker do
subject { described_class.new }
let(:top_items) do
[
'http://example.com/self-reply-1',
'http://other.com/other-reply-2',
'http://example.com/self-reply-3',
]
end
let(:top_items_paged) do
[
'http://example.com/self-reply-4',
'http://other.com/other-reply-5',
'http://example.com/self-reply-6',
]
end
let(:nested_items) do
[
'http://example.com/nested-self-reply-1',
'http://other.com/nested-other-reply-2',
'http://example.com/nested-self-reply-3',
]
end
let(:nested_items_paged) do
[
'http://example.com/nested-self-reply-4',
'http://other.com/nested-other-reply-5',
'http://example.com/nested-self-reply-6',
]
end
let(:all_items) do
top_items + top_items_paged + nested_items + nested_items_paged
end
let(:top_note_uri) do
'http://example.com/top-post'
end
let(:top_collection_uri) do
'http://example.com/top-post/replies'
end
# The reply uri that has the nested replies under it
let(:reply_note_uri) do
'http://other.com/other-reply-2'
end
# The collection uri of nested replies
let(:reply_collection_uri) do
'http://other.com/other-reply-2/replies'
end
let(:replies_top) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: top_collection_uri,
type: 'Collection',
items: top_items + top_items_paged,
}
end
let(:replies_nested) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: reply_collection_uri,
type: 'Collection',
items: nested_items + nested_items_paged,
}
end
# The status resource for the top post
let(:top_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: top_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: replies_top,
attributedTo: 'https://example.com',
}
end
# The status resource that has the uri to the replies collection
let(:reply_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: reply_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: replies_nested,
attributedTo: 'https://other.com',
}
end
let(:empty_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://example.com/empty',
type: 'Note',
content: 'Lorem ipsum',
replies: [],
attributedTo: 'https://example.com',
}
end
let(:account) { Fabricate(:account, domain: 'example.com') }
let(:status) do
Fabricate(
:status,
account: account,
uri: top_note_uri,
created_at: 1.day.ago - Status::FetchRepliesConcern::FETCH_REPLIES_INITIAL_WAIT_MINUTES
)
end
before do
allow(FetchReplyWorker).to receive(:push_bulk)
all_items.each do |item|
next if [top_note_uri, reply_note_uri].include? item
stub_request(:get, item).to_return(status: 200, body: Oj.dump(empty_object), headers: { 'Content-Type': 'application/activity+json' })
end
stub_request(:get, top_note_uri).to_return(status: 200, body: Oj.dump(top_object), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, reply_note_uri).to_return(status: 200, body: Oj.dump(reply_object), headers: { 'Content-Type': 'application/activity+json' })
end
shared_examples 'fetches all replies' do
it 'fetches statuses recursively' do
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(all_items)
end
it 'respects the maximum limits set by not recursing after the max is reached' do
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_REPLIES', 5)
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(top_items + top_items_paged)
end
end
describe 'perform' do
context 'when the payload is a Note with replies as a Collection of inlined replies' do
it_behaves_like 'fetches all replies'
end
context 'when the payload is a Note with replies as a URI to a Collection' do
let(:top_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: top_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: top_collection_uri,
attributedTo: 'https://example.com',
}
end
let(:reply_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: reply_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: reply_collection_uri,
attributedTo: 'https://other.com',
}
end
before do
stub_request(:get, top_collection_uri).to_return(status: 200, body: Oj.dump(replies_top), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, reply_collection_uri).to_return(status: 200, body: Oj.dump(replies_nested), headers: { 'Content-Type': 'application/activity+json' })
end
it_behaves_like 'fetches all replies'
end
context 'when the payload is a Note with replies as a paginated collection' do
let(:top_page_2_uri) do
"#{top_collection_uri}/2"
end
let(:reply_page_2_uri) do
"#{reply_collection_uri}/2"
end
let(:top_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: top_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: {
type: 'Collection',
id: top_collection_uri,
first: {
type: 'CollectionPage',
partOf: top_collection_uri,
items: top_items,
next: top_page_2_uri,
},
},
attributedTo: 'https://example.com',
}
end
let(:reply_object) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: reply_note_uri,
type: 'Note',
content: 'Lorem ipsum',
replies: {
type: 'Collection',
id: reply_collection_uri,
first: {
type: 'CollectionPage',
partOf: reply_collection_uri,
items: nested_items,
next: reply_page_2_uri,
},
},
attributedTo: 'https://other.com',
}
end
let(:top_page_two) do
{
type: 'CollectionPage',
id: top_page_2_uri,
partOf: top_collection_uri,
items: top_items_paged,
}
end
let(:reply_page_two) do
{
type: 'CollectionPage',
id: reply_page_2_uri,
partOf: reply_collection_uri,
items: nested_items_paged,
}
end
before do
stub_request(:get, top_page_2_uri).to_return(status: 200, body: Oj.dump(top_page_two), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, reply_page_2_uri).to_return(status: 200, body: Oj.dump(reply_page_two), headers: { 'Content-Type': 'application/activity+json' })
end
it_behaves_like 'fetches all replies'
it 'limits by max pages' do
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_PAGES', 3)
got_uris = subject.perform(status.id)
expect(got_uris).to match_array(top_items + top_items_paged + nested_items)
end
end
context 'when replies should not be fetched' do
# ensure that we should not fetch by setting the status to be created in the debounce window
let(:status) { Fabricate(:status, account: account, uri: top_note_uri, created_at: DateTime.now) }
before do
stub_const('Status::FetchRepliesConcern::FETCH_REPLIES_INITIAL_WAIT_MINUTES', 1.week)
end
it 'returns nil without fetching' do
got_uris = subject.perform(status.id)
expect(got_uris).to be_nil
assert_not_requested :get, top_note_uri
end
end
end
end