From bea82f2279ec57089922900e284e74af467e487f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?KMY=EF=BC=88=E9=9B=AA=E3=81=82=E3=81=99=E3=81=8B=EF=BC=89?= Date: Tue, 12 Dec 2023 09:52:08 +0900 Subject: [PATCH] =?UTF-8?q?Fix:=20#284=20`FetchInstanceInfoWorker`?= =?UTF-8?q?=E3=81=8C=E5=8E=9F=E5=9B=A0=E3=81=A7Sidekiq=E3=81=AEJob?= =?UTF-8?q?=E3=81=8C=E8=A9=B0=E3=81=BE=E3=82=8B=E5=95=8F=E9=A1=8C=20(#342)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix: #284 `FetchInstanceInfoWorker`が原因でSidekiqのJobが詰まる問題 * Fix: InstanceInfoを取得するタイミング * Fix test * Fix test * Fix: HTTPコード * 調整 --- .../activitypub/process_account_service.rb | 5 +-- .../activitypub/fetch_instance_info_worker.rb | 34 +++++++++---------- .../update_instance_info_scheduler.rb | 15 -------- config/sidekiq.yml | 4 --- spec/lib/activitypub/activity/update_spec.rb | 1 + .../process_account_service_spec.rb | 4 +++ .../fetch_instance_info_worker_spec.rb | 20 +++++++++++ .../update_instance_info_scheduler_spec.rb | 19 ----------- 8 files changed, 44 insertions(+), 58 deletions(-) delete mode 100644 app/workers/scheduler/update_instance_info_scheduler.rb delete mode 100644 spec/workers/scheduler/update_instance_info_scheduler_spec.rb diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index f3b1c84615..a1a07eae04 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -46,7 +46,6 @@ class ActivityPub::ProcessAccountService < BaseService end create_account - fetch_instance_info end update_account @@ -66,6 +65,8 @@ class ActivityPub::ProcessAccountService < BaseService check_links! if @account.fields.any?(&:requires_verification?) end + fetch_instance_info + @account rescue Oj::ParseError nil @@ -210,7 +211,7 @@ class ActivityPub::ProcessAccountService < BaseService end def fetch_instance_info - ActivityPub::FetchInstanceInfoWorker.perform_async(@account.domain) unless InstanceInfo.exists?(domain: @account.domain) + ActivityPub::FetchInstanceInfoWorker.perform_async(@account.domain) unless Rails.cache.exist?("fetch_instance_info:#{@account.domain}", expires_in: 1.day) end def actor_type diff --git a/app/workers/activitypub/fetch_instance_info_worker.rb b/app/workers/activitypub/fetch_instance_info_worker.rb index 57cbd97d10..bc9a1a4815 100644 --- a/app/workers/activitypub/fetch_instance_info_worker.rb +++ b/app/workers/activitypub/fetch_instance_info_worker.rb @@ -8,28 +8,32 @@ class ActivityPub::FetchInstanceInfoWorker sidekiq_options queue: 'push', retry: 2 - class Error < StandardError; end - class RequestError < Error; end - class DeadError < Error; end - SUPPORTED_NOTEINFO_RELS = ['http://nodeinfo.diaspora.software/ns/schema/2.0', 'http://nodeinfo.diaspora.software/ns/schema/2.1'].freeze def perform(domain) @instance = Instance.find_by(domain: domain) return if !@instance || @instance.unavailable_domain.present? - with_redis_lock("instance_info:#{domain}") do - link = nodeinfo_link - return if link.nil? - - update_info!(link) + Rails.cache.fetch("fetch_instance_info:#{@instance.domain}", expires_in: 1.day, race_condition_ttl: 1.hour) do + fetch! end - rescue ActivityPub::FetchInstanceInfoWorker::DeadError + true end private + def fetch! + link = nodeinfo_link + return if link.nil? + + update_info!(link) + + true + rescue Mastodon::UnexpectedResponseError + true + end + def nodeinfo_link nodeinfo = fetch_json("https://#{@instance.domain}/.well-known/nodeinfo") return nil if nodeinfo.nil? || !nodeinfo.key?('links') @@ -63,15 +67,9 @@ class ActivityPub::FetchInstanceInfoWorker def fetch_json(url) build_request(url).perform do |response| - if [200, 203].include?(response.code) - raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) + raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) - body_to_json(response.body_with_limit) - elsif [400, 401, 403, 404, 410].include?(response.code) - raise ActivityPub::FetchInstanceInfoWorker::DeadError, "Request for #{@instance.domain} returned HTTP #{response.code}" - else - raise ActivityPub::FetchInstanceInfoWorker::RequestError, "Request for #{@instance.domain} returned HTTP #{response.code}" - end + body_to_json(response.body_with_limit) end end diff --git a/app/workers/scheduler/update_instance_info_scheduler.rb b/app/workers/scheduler/update_instance_info_scheduler.rb deleted file mode 100644 index f5b2852859..0000000000 --- a/app/workers/scheduler/update_instance_info_scheduler.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -class Scheduler::UpdateInstanceInfoScheduler - include Sidekiq::Worker - - sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i - - def perform - Instance.select(:domain).reorder(nil).find_in_batches do |instances| - ActivityPub::FetchInstanceInfoWorker.push_bulk(instances) do |instance| - [instance.domain] - end - end - end -end diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 6102dcaeaf..538987c9ac 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -63,10 +63,6 @@ interval: 30 seconds class: Scheduler::SidekiqHealthScheduler queue: scheduler - update_instance_info_scheduler: - cron: '0 0 * * *' - class: Scheduler::UpdateInstanceInfoScheduler - queue: scheduler software_update_check_scheduler: interval: 30 minutes class: Scheduler::SoftwareUpdateCheckScheduler diff --git a/spec/lib/activitypub/activity/update_spec.rb b/spec/lib/activitypub/activity/update_spec.rb index 87e96d2d1b..6c84c5836a 100644 --- a/spec/lib/activitypub/activity/update_spec.rb +++ b/spec/lib/activitypub/activity/update_spec.rb @@ -55,6 +55,7 @@ RSpec.describe ActivityPub::Activity::Update do stub_request(:get, actor_json[:following]).to_return(status: 404) stub_request(:get, actor_json[:featured]).to_return(status: 404) stub_request(:get, actor_json[:featuredTags]).to_return(status: 404) + stub_request(:get, 'https://example.com/.well-known/nodeinfo').to_return(status: 404) subject.perform end diff --git a/spec/services/activitypub/process_account_service_spec.rb b/spec/services/activitypub/process_account_service_spec.rb index f0885ff672..6dc3ced27e 100644 --- a/spec/services/activitypub/process_account_service_spec.rb +++ b/spec/services/activitypub/process_account_service_spec.rb @@ -5,6 +5,10 @@ require 'rails_helper' RSpec.describe ActivityPub::ProcessAccountService, type: :service do subject { described_class.new } + before do + stub_request(:get, 'https://example.com/.well-known/nodeinfo').to_return(status: 404) + end + context 'with searchability' do subject { described_class.new.call('alice', 'example.com', payload) } diff --git a/spec/workers/activitypub/fetch_instance_info_worker_spec.rb b/spec/workers/activitypub/fetch_instance_info_worker_spec.rb index f6dacff5fc..9dc9594041 100644 --- a/spec/workers/activitypub/fetch_instance_info_worker_spec.rb +++ b/spec/workers/activitypub/fetch_instance_info_worker_spec.rb @@ -67,9 +67,22 @@ describe ActivityPub::FetchInstanceInfoWorker do Instance.refresh end + it 'does not update immediately' do + stub_request(:get, 'https://example.com/nodeinfo/2.0').to_return(status: 200, body: nodeinfo_json) + subject.perform('example.com') + stub_request(:get, 'https://example.com/nodeinfo/2.0').to_return(status: 200, body: new_nodeinfo_json) + subject.perform('example.com') + + info = InstanceInfo.find_by(domain: 'example.com') + expect(info).to_not be_nil + expect(info.software).to eq 'mastodon' + expect(info.version).to eq '4.2.0-beta1' + end + it 'performs a mastodon instance' do stub_request(:get, 'https://example.com/nodeinfo/2.0').to_return(status: 200, body: nodeinfo_json) subject.perform('example.com') + Rails.cache.delete('fetch_instance_info:example.com') stub_request(:get, 'https://example.com/nodeinfo/2.0').to_return(status: 200, body: new_nodeinfo_json) subject.perform('example.com') @@ -93,5 +106,12 @@ describe ActivityPub::FetchInstanceInfoWorker do info = InstanceInfo.find_by(domain: 'example.com') expect(info).to be_nil end + + it 'does not fetch again immediately' do + expect(subject.perform('example.com')).to be true + expect(subject.perform('example.com')).to be true + + expect(a_request(:get, 'https://example.com/.well-known/nodeinfo')).to have_been_made.once + end end end diff --git a/spec/workers/scheduler/update_instance_info_scheduler_spec.rb b/spec/workers/scheduler/update_instance_info_scheduler_spec.rb deleted file mode 100644 index f3a190417f..0000000000 --- a/spec/workers/scheduler/update_instance_info_scheduler_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -describe Scheduler::UpdateInstanceInfoScheduler do - let(:worker) { described_class.new } - - before do - stub_request(:get, 'https://example.com/.well-known/nodeinfo').to_return(status: 200, body: '{}') - Fabricate(:account, domain: 'example.com') - Instance.refresh - end - - describe 'perform' do - it 'runs without error' do - expect { worker.perform }.to_not raise_error - end - end -end