From ec059317fad4fc5f1ed45a72cbae48edafcb586d Mon Sep 17 00:00:00 2001 From: Claire Date: Thu, 21 Oct 2021 20:39:35 +0200 Subject: [PATCH] Fix some link previews being incorrectly generated from other prior links (#16885) * Add tests * Fix some link previews being incorrectly generated from different prior links PR #12403 added a cache to avoid redundant queries when the OEmbed endpoint can be guessed from the URL. This caching mechanism is not perfectly correct as there is no guarantee that all pages from a given domain share the same OEmbed provider endpoint. This PR prevents the FetchOEmbedService from caching OEmbed endpoint that cannot be generalized by replacing a fully-qualified URL from the endpoint's parameters, greatly reducing the number of incorrect cached generalizations. --- app/services/fetch_oembed_service.rb | 5 ++- spec/fixtures/requests/oembed_youtube.html | 7 ++++ spec/services/fetch_oembed_service_spec.rb | 41 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/requests/oembed_youtube.html diff --git a/app/services/fetch_oembed_service.rb b/app/services/fetch_oembed_service.rb index 60be9b9dc09..4cbaa04c623 100644 --- a/app/services/fetch_oembed_service.rb +++ b/app/services/fetch_oembed_service.rb @@ -2,6 +2,7 @@ class FetchOEmbedService ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze + URL_REGEX = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze attr_reader :url, :options, :format, :endpoint_url @@ -65,10 +66,12 @@ class FetchOEmbedService end def cache_endpoint! + return unless URL_REGEX.match?(@endpoint_url) + url_domain = Addressable::URI.parse(@url).normalized_host endpoint_hash = { - endpoint: @endpoint_url.gsub(/(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i, '={url}'), + endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'), format: @format, } diff --git a/spec/fixtures/requests/oembed_youtube.html b/spec/fixtures/requests/oembed_youtube.html new file mode 100644 index 00000000000..1508e4dd959 --- /dev/null +++ b/spec/fixtures/requests/oembed_youtube.html @@ -0,0 +1,7 @@ + + + + + + + diff --git a/spec/services/fetch_oembed_service_spec.rb b/spec/services/fetch_oembed_service_spec.rb index a4262b04085..88f0113eddf 100644 --- a/spec/services/fetch_oembed_service_spec.rb +++ b/spec/services/fetch_oembed_service_spec.rb @@ -13,6 +13,32 @@ describe FetchOEmbedService, type: :service do describe 'discover_provider' do context 'when status code is 200 and MIME type is text/html' do + context 'when OEmbed endpoint contains URL as parameter' do + before do + stub_request(:get, 'https://www.youtube.com/watch?v=IPSbNdBmWKE').to_return( + status: 200, + headers: { 'Content-Type': 'text/html' }, + body: request_fixture('oembed_youtube.html'), + ) + stub_request(:get, 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE').to_return( + status: 200, + headers: { 'Content-Type': 'text/html' }, + body: request_fixture('oembed_json_empty.html') + ) + end + + it 'returns new OEmbed::Provider for JSON provider' do + subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE') + expect(subject.endpoint_url).to eq 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE' + expect(subject.format).to eq :json + end + + it 'stores URL template' do + subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE') + expect(Rails.cache.read('oembed_endpoint:www.youtube.com')[:endpoint]).to eq 'https://www.youtube.com/oembed?format=json&url={url}' + end + end + context 'Both of JSON and XML provider are discoverable' do before do stub_request(:get, 'https://host.test/oembed.html').to_return( @@ -33,6 +59,11 @@ describe FetchOEmbedService, type: :service do expect(subject.endpoint_url).to eq 'https://host.test/provider.xml' expect(subject.format).to eq :xml end + + it 'does not cache OEmbed endpoint' do + subject.call('https://host.test/oembed.html', format: :xml) + expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false + end end context 'JSON provider is discoverable while XML provider is not' do @@ -49,6 +80,11 @@ describe FetchOEmbedService, type: :service do expect(subject.endpoint_url).to eq 'https://host.test/provider.json' expect(subject.format).to eq :json end + + it 'does not cache OEmbed endpoint' do + subject.call('https://host.test/oembed.html') + expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false + end end context 'XML provider is discoverable while JSON provider is not' do @@ -65,6 +101,11 @@ describe FetchOEmbedService, type: :service do expect(subject.endpoint_url).to eq 'https://host.test/provider.xml' expect(subject.format).to eq :xml end + + it 'does not cache OEmbed endpoint' do + subject.call('https://host.test/oembed.html') + expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false + end end context 'Invalid XML provider is discoverable while JSON provider is not' do