From 1efda1c4535d453b3f9e71c61a225d7cc0d7e75f Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 17 May 2017 00:41:15 +0200 Subject: [PATCH] Fix #2572 - Resolve preview cards for remote statuses as well as local ones (#3088) --- app/services/fetch_link_card_service.rb | 26 +++++++++++++++-- app/services/process_feed_service.rb | 4 +++ spec/services/fetch_link_card_service_spec.rb | 28 ++++++++++++++++--- spec/services/process_feed_service_spec.rb | 2 ++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 27d7c2a7083..dec4aabd624 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -7,11 +7,11 @@ class FetchLinkCardService < BaseService def call(status) # Get first http/https URL that isn't local - url = status.text.match(URL_PATTERN).to_a.reject { |uri| TagManager.instance.local_url?(uri) }.first + url = parse_urls(status) return if url.nil? - url = Addressable::URI.parse(url).normalize.to_s + url = url.to_s card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url) res = http_client.head(url) @@ -22,6 +22,28 @@ class FetchLinkCardService < BaseService private + def parse_urls(status) + if status.local? + urls = status.text.match(URL_PATTERN).to_a.map { |uri| Addressable::URI.parse(uri).normalize } + else + html = Nokogiri::HTML(status.text) + links = html.css('a') + urls = links.map { |a| Addressable::URI.parse(a['href']).normalize unless skip_link?(a) }.compact + end + + urls.reject { |uri| bad_url?(uri) }.first + end + + def bad_url?(uri) + # Avoid local instance URLs and invalid URLs + TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme) || uri.host.blank? + end + + def skip_link?(a) + # Avoid links for hashtags and mentions (microformats) + a['rel']&.include?('tag') || a['class']&.include?('u-url') + end + def attempt_oembed(card, url) response = OEmbed::Providers.get(url) diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb index c573c0490cd..04d6a100f6e 100644 --- a/app/services/process_feed_service.rb +++ b/app/services/process_feed_service.rb @@ -69,8 +69,12 @@ class ProcessFeedService < BaseService notify_about_mentions!(status) unless status.reblog? notify_about_reblog!(status) if status.reblog? && status.reblog.account.local? + Rails.logger.debug "Queuing remote status #{status.id} (#{id}) for distribution" + + LinkCrawlWorker.perform_async(status.id) unless status.spoiler_text.present? DistributionWorker.perform_async(status.id) + status end diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index 681c99c1149..9df41cf559d 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -1,15 +1,35 @@ require 'rails_helper' RSpec.describe FetchLinkCardService do + subject { FetchLinkCardService.new } + before do stub_request(:head, 'http://example.xn--fiqs8s/').to_return(status: 200, headers: { 'Content-Type' => 'text/html' }) stub_request(:get, 'http://example.xn--fiqs8s/').to_return(request_fixture('idn.txt')) + stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404) + + subject.call(status) end - it 'works with IDN URLs' do - status = Fabricate(:status, text: 'Check out http://example.中国') + context 'in a local status' do + context do + let(:status) { Fabricate(:status, text: 'Check out http://example.中国') } - FetchLinkCardService.new.call(status) - expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once + it 'works with IDN URLs' do + expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once + end + end + end + + context 'in a remote status' do + let(:status) { Fabricate(:status, uri: 'abc', text: 'Habt ihr ein paar gute Links zu #Wannacry herumfliegen? Ich will mal unter
https://github.com/qbi/WannaCry was sammeln. !security ') } + + it 'parses out URLs' do + expect(a_request(:head, 'https://github.com/qbi/WannaCry')).to have_been_made.at_least_once + end + + it 'ignores URLs to hashtags' do + expect(a_request(:head, 'https://quitter.se/tag/wannacry')).to_not have_been_made + end end end diff --git a/spec/services/process_feed_service_spec.rb b/spec/services/process_feed_service_spec.rb index 9ac22cbba30..d85ee3b567a 100644 --- a/spec/services/process_feed_service_spec.rb +++ b/spec/services/process_feed_service_spec.rb @@ -9,6 +9,8 @@ RSpec.describe ProcessFeedService do before do stub_request(:post, "https://pubsubhubbub.superfeedr.com/").to_return(:status => 200, :body => "", :headers => {}) + stub_request(:head, "http://kickass.zone/media/2").to_return(:status => 404) + stub_request(:head, "http://kickass.zone/media/3").to_return(:status => 404) stub_request(:get, "http://kickass.zone/system/accounts/avatars/000/000/001/large/eris.png").to_return(request_fixture('avatar.txt')) stub_request(:get, "http://kickass.zone/system/media_attachments/files/000/000/002/original/morpheus_linux.jpg?1476059910").to_return(request_fixture('attachment1.txt')) stub_request(:get, "http://kickass.zone/system/media_attachments/files/000/000/003/original/gizmo.jpg?1476060065").to_return(request_fixture('attachment2.txt'))