From cb4ef24ac9d48e70648135f106fdc275dedf14fc Mon Sep 17 00:00:00 2001 From: David Yip Date: Wed, 15 Nov 2017 17:26:29 -0600 Subject: [PATCH] Match keyword mute filter on hashtags. #208. It is reasonable to expect someone to enter #foo to mute hashtag #foo. However, tags are recorded on statuses without the preceding #. To adjust for this, we build a separate tag matcher and use Tag::HASHTAG_RE to extract a hashtag from the hashtag syntax. --- app/lib/feed_manager.rb | 21 ++++--- app/models/glitch/keyword_mute.rb | 76 ++++++++++++++++++------- spec/models/glitch/keyword_mute_spec.rb | 4 +- 3 files changed, 69 insertions(+), 32 deletions(-) diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 414632a8a94..e12a5c01632 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -141,7 +141,7 @@ class FeedManager return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) - return true if keyword_filter?(status, Glitch::KeywordMute.matcher_for(receiver_id)) + return true if keyword_filter?(status, receiver_id) check_for_mutes = [status.account_id] check_for_mutes.concat(status.mentions.pluck(:account_id)) @@ -170,17 +170,20 @@ class FeedManager false end - def keyword_filter?(status, matcher) - should_filter = matcher =~ status.text - should_filter ||= matcher =~ status.spoiler_text - should_filter ||= status.tags.find_each.any? { |t| matcher =~ t.name } + def keyword_filter?(status, receiver_id) + text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) + tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) + + should_filter = text_matcher =~ status.text + should_filter ||= text_matcher =~ status.spoiler_text + should_filter ||= tag_matcher =~ status.tags if status.reblog? reblog = status.reblog - should_filter ||= matcher =~ reblog.text - should_filter ||= matcher =~ reblog.spoiler_text - should_filter ||= reblog.tags.find_each.any? { |t| matcher =~ t.name } + should_filter ||= text_matcher =~ reblog.text + should_filter ||= text_matcher =~ reblog.spoiler_text + should_filter ||= tag_matcher =~ status.tags end !!should_filter @@ -195,7 +198,7 @@ class FeedManager should_filter = Block.where(account_id: receiver_id, target_account_id: check_for_blocks).any? # Filter if it's from someone I blocked, in reply to someone I blocked, or mentioning someone I blocked should_filter ||= (status.account.silenced? && !Follow.where(account_id: receiver_id, target_account_id: status.account_id).exists?) # of if the account is silenced and I'm not following them - should_filter ||= keyword_filter?(status, Glitch::KeywordMute.matcher_for(receiver_id)) # or if the mention contains a muted keyword + should_filter ||= keyword_filter?(status, receiver_id) # or if the mention contains a muted keyword should_filter end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 009de1880b8..733dd0bc80f 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -16,44 +16,37 @@ class Glitch::KeywordMute < ApplicationRecord validates_presence_of :keyword - after_commit :invalidate_cached_matcher + after_commit :invalidate_cached_matchers - def self.matcher_for(account_id) - Matcher.new(account_id) + def self.text_matcher_for(account_id) + TextMatcher.new(account_id) + end + + def self.tag_matcher_for(account_id) + TagMatcher.new(account_id) end private - def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:regex:#{account_id}") + def invalidate_cached_matchers + Rails.cache.delete(TextMatcher.cache_key(account_id)) + Rails.cache.delete(TagMatcher.cache_key(account_id)) end - class Matcher + class RegexpMatcher attr_reader :account_id attr_reader :regex def initialize(account_id) @account_id = account_id - regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } + regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text } @regex = /#{regex_text}/ end - def =~(str) - regex =~ str - end - - private + protected def keywords - Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) - end - - def regex_text_for_account - kws = keywords.find_each.with_object([]) do |kw, a| - a << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : kw.keyword) - end - - Regexp.union(kws).source + Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword) end def boundary_regex_for_keyword(keyword) @@ -63,4 +56,45 @@ class Glitch::KeywordMute < ApplicationRecord /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ end end + + class TextMatcher < RegexpMatcher + def self.cache_key(account_id) + format('keyword_mutes:regex:%s', account_id) + end + + def =~(str) + regex =~ str + end + + private + + def make_regex_text + kws = keywords.map! do |whole_word, keyword| + whole_word ? boundary_regex_for_keyword(keyword) : keyword + end + + Regexp.union(kws).source + end + end + + class TagMatcher < RegexpMatcher + def self.cache_key(account_id) + format('keyword_mutes:tag:%s', account_id) + end + + def =~(tags) + tags.pluck(:name).detect { |n| regex =~ n } + end + + private + + def make_regex_text + kws = keywords.map! do |whole_word, keyword| + term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword + whole_word ? boundary_regex_for_keyword(term) : term + end + + Regexp.union(kws).source + end + end end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index 9685c649385..e14af0e6a48 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -4,8 +4,8 @@ RSpec.describe Glitch::KeywordMute, type: :model do let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } - describe '.matcher_for' do - let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } + describe '.text_matcher_for' do + let(:matcher) { Glitch::KeywordMute.text_matcher_for(alice.id) } describe 'with no mutes' do before do