Change how hashtags are normalized (#18795)

* Change how hashtags are normalized

* Fix tests
rebase/4.0.0rc1
Eugen Rochko 2022-07-13 15:03:28 +02:00 committed by GitHub
parent 12ed2d793b
commit e7aa2be828
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 193 additions and 51 deletions

View File

@ -16,6 +16,8 @@ module Admin
if @tag.update(tag_params.merge(reviewed_at: Time.now.utc)) if @tag.update(tag_params.merge(reviewed_at: Time.now.utc))
redirect_to admin_tag_path(@tag.id), notice: I18n.t('admin.tags.updated_msg') redirect_to admin_tag_path(@tag.id), notice: I18n.t('admin.tags.updated_msg')
else else
@time_period = (6.days.ago.to_date...Time.now.utc.to_date)
render :show render :show
end end
end end
@ -27,7 +29,7 @@ module Admin
end end
def tag_params def tag_params
params.require(:tag).permit(:name, :trendable, :usable, :listable) params.require(:tag).permit(:name, :display_name, :trendable, :usable, :listable)
end end
end end
end end

View File

@ -13,9 +13,7 @@ class Api::V1::FeaturedTagsController < Api::BaseController
end end
def create def create
@featured_tag = current_account.featured_tags.new(featured_tag_params) @featured_tag = current_account.featured_tags.create!(featured_tag_params)
@featured_tag.reset_data
@featured_tag.save!
render json: @featured_tag, serializer: REST::FeaturedTagSerializer render json: @featured_tag, serializer: REST::FeaturedTagSerializer
end end

View File

@ -11,7 +11,6 @@ class Settings::FeaturedTagsController < Settings::BaseController
def create def create
@featured_tag = current_account.featured_tags.new(featured_tag_params) @featured_tag = current_account.featured_tags.new(featured_tag_params)
@featured_tag.reset_data
if @featured_tag.save if @featured_tag.save
redirect_to settings_featured_tags_path redirect_to settings_featured_tags_path

View File

@ -606,7 +606,20 @@ function insertIntoTagHistory(recognizedTags, text) {
const state = getState(); const state = getState();
const oldHistory = state.getIn(['compose', 'tagHistory']); const oldHistory = state.getIn(['compose', 'tagHistory']);
const me = state.getIn(['meta', 'me']); const me = state.getIn(['meta', 'me']);
const names = recognizedTags.map(tag => text.match(new RegExp(`#${tag.name}`, 'i'))[0].slice(1));
// FIXME: Matching input hashtags with recognized hashtags has become more
// complicated because of new normalization rules, it's no longer just
// a case sensitivity issue
const names = recognizedTags.map(tag => {
const matches = text.match(new RegExp(`#${tag.name}`, 'i'));
if (matches && matches.length > 0) {
return matches[0].slice(1);
} else {
return tag.name;
}
});
const intersectedOldHistory = oldHistory.filter(name => names.findIndex(newName => newName.toLowerCase() === name.toLowerCase()) === -1); const intersectedOldHistory = oldHistory.filter(name => names.findIndex(newName => newName.toLowerCase() === name.toLowerCase()) === -1);
names.push(...intersectedOldHistory.toJS()); names.push(...intersectedOldHistory.toJS());

10
app/lib/ascii_folding.rb Normal file
View File

@ -0,0 +1,10 @@
# frozen_string_literal: true
class ASCIIFolding
NON_ASCII_CHARS = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž'
EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz'
def fold(str)
str.tr(NON_ASCII_CHARS, EQUIVALENT_ASCII_CHARS)
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class HashtagNormalizer
def normalize(str)
remove_invalid_characters(ascii_folding(lowercase(cjk_width(str))))
end
private
def remove_invalid_characters(str)
str.gsub(/[^[:alnum:]#{Tag::HASHTAG_SEPARATORS}]/, '')
end
def ascii_folding(str)
ASCIIFolding.new.fold(str)
end
def lowercase(str)
str.mb_chars.downcase.to_s
end
def cjk_width(str)
str.unicode_normalize(:nfkc)
end
end

View File

@ -62,7 +62,7 @@ class Account < ApplicationRecord
) )
USERNAME_RE = /[a-z0-9_]+([a-z0-9_\.-]+[a-z0-9_]+)?/i USERNAME_RE = /[a-z0-9_]+([a-z0-9_\.-]+[a-z0-9_]+)?/i
MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:word:]\.\-]+[[:word:]]+)?)/i MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:alnum:]\.\-]+[[:alnum:]]+)?)/i
URL_PREFIX_RE = /\Ahttp(s?):\/\/[^\/]+/ URL_PREFIX_RE = /\Ahttp(s?):\/\/[^\/]+/
include Attachmentable include Attachmentable

View File

@ -3,14 +3,14 @@
# #
# Table name: custom_filters # Table name: custom_filters
# #
# id :bigint not null, primary key # id :bigint(8) not null, primary key
# account_id :bigint # account_id :bigint(8)
# expires_at :datetime # expires_at :datetime
# phrase :text default(""), not null # phrase :text default(""), not null
# context :string default([]), not null, is an Array # context :string default([]), not null, is an Array
# created_at :datetime not null # created_at :datetime not null
# updated_at :datetime not null # updated_at :datetime not null
# action :integer default(0), not null # action :integer default("warn"), not null
# #
class CustomFilter < ApplicationRecord class CustomFilter < ApplicationRecord

View File

@ -3,8 +3,8 @@
# #
# Table name: custom_filter_keywords # Table name: custom_filter_keywords
# #
# id :bigint not null, primary key # id :bigint(8) not null, primary key
# custom_filter_id :bigint not null # custom_filter_id :bigint(8) not null
# keyword :text default(""), not null # keyword :text default(""), not null
# whole_word :boolean default(TRUE), not null # whole_word :boolean default(TRUE), not null
# created_at :datetime not null # created_at :datetime not null

View File

@ -13,17 +13,19 @@
# #
class FeaturedTag < ApplicationRecord class FeaturedTag < ApplicationRecord
belongs_to :account, inverse_of: :featured_tags, required: true belongs_to :account, inverse_of: :featured_tags
belongs_to :tag, inverse_of: :featured_tags, required: true belongs_to :tag, inverse_of: :featured_tags, optional: true # Set after validation
delegate :name, to: :tag, allow_nil: true validate :validate_tag_name, on: :create
validates_associated :tag, on: :create
validates :name, presence: true, on: :create
validate :validate_featured_tags_limit, on: :create validate :validate_featured_tags_limit, on: :create
def name=(str) before_create :set_tag
self.tag = Tag.find_or_create_by_names(str.strip)&.first before_create :reset_data
attr_writer :name
def name
tag_id.present? ? tag.name : @name
end end
def increment(timestamp) def increment(timestamp)
@ -34,14 +36,23 @@ class FeaturedTag < ApplicationRecord
update(statuses_count: [0, statuses_count - 1].max, last_status_at: account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).where.not(id: deleted_status_id).select(:created_at).first&.created_at) update(statuses_count: [0, statuses_count - 1].max, last_status_at: account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).where.not(id: deleted_status_id).select(:created_at).first&.created_at)
end end
private
def set_tag
self.tag = Tag.find_or_create_by_names(@name)&.first
end
def reset_data def reset_data
self.statuses_count = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).count self.statuses_count = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).count
self.last_status_at = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).select(:created_at).first&.created_at self.last_status_at = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).select(:created_at).first&.created_at
end end
private
def validate_featured_tags_limit def validate_featured_tags_limit
errors.add(:base, I18n.t('featured_tags.errors.limit')) if account.featured_tags.count >= 10 errors.add(:base, I18n.t('featured_tags.errors.limit')) if account.featured_tags.count >= 10
end end
def validate_tag_name
errors.add(:name, :blank) if @name.blank?
errors.add(:name, :invalid) unless @name.match?(/\A(#{Tag::HASHTAG_NAME_RE})\z/i)
end
end end

View File

@ -15,6 +15,7 @@
# last_status_at :datetime # last_status_at :datetime
# max_score :float # max_score :float
# max_score_at :datetime # max_score_at :datetime
# display_name :string
# #
class Tag < ApplicationRecord class Tag < ApplicationRecord
@ -24,11 +25,12 @@ class Tag < ApplicationRecord
has_many :featured_tags, dependent: :destroy, inverse_of: :tag has_many :featured_tags, dependent: :destroy, inverse_of: :tag
HASHTAG_SEPARATORS = "_\u00B7\u200c" HASHTAG_SEPARATORS = "_\u00B7\u200c"
HASHTAG_NAME_RE = "([[:word:]_][[:word:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:word:]#{HASHTAG_SEPARATORS}]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)" HASHTAG_NAME_RE = "([[:alnum:]_][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alnum:]_])|([[:alnum:]_]*[[:alpha:]][[:alnum:]_]*)"
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i
validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i } validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i }
validate :validate_name_change, if: -> { !new_record? && name_changed? } validate :validate_name_change, if: -> { !new_record? && name_changed? }
validate :validate_display_name_change, if: -> { !new_record? && display_name_changed? }
scope :reviewed, -> { where.not(reviewed_at: nil) } scope :reviewed, -> { where.not(reviewed_at: nil) }
scope :unreviewed, -> { where(reviewed_at: nil) } scope :unreviewed, -> { where(reviewed_at: nil) }
@ -46,6 +48,10 @@ class Tag < ApplicationRecord
name name
end end
def display_name
attributes['display_name'] || name
end
def usable def usable
boolean_with_default('usable', true) boolean_with_default('usable', true)
end end
@ -90,8 +96,10 @@ class Tag < ApplicationRecord
class << self class << self
def find_or_create_by_names(name_or_names) def find_or_create_by_names(name_or_names)
Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name| names = Array(name_or_names).map { |str| [normalize(str), str] }.uniq(&:first)
tag = matching_name(normalized_name).first || create(name: normalized_name)
names.map do |(normalized_name, display_name)|
tag = matching_name(normalized_name).first || create(name: normalized_name, display_name: display_name)
yield tag if block_given? yield tag if block_given?
@ -129,7 +137,7 @@ class Tag < ApplicationRecord
end end
def normalize(str) def normalize(str)
str.gsub(/\A#/, '') HashtagNormalizer.new.normalize(str)
end end
end end
@ -138,4 +146,8 @@ class Tag < ApplicationRecord
def validate_name_change def validate_name_change
errors.add(:name, I18n.t('tags.does_not_match_previous_name')) unless name_was.mb_chars.casecmp(name.mb_chars).zero? errors.add(:name, I18n.t('tags.does_not_match_previous_name')) unless name_was.mb_chars.casecmp(name.mb_chars).zero?
end end
def validate_display_name_change
errors.add(:display_name, I18n.t('tags.does_not_match_previous_name')) unless HashtagNormalizer.new.normalize(display_name).casecmp(name.mb_chars).zero?
end
end end

View File

@ -10,11 +10,11 @@ class ActivityPub::HashtagSerializer < ActivityPub::Serializer
end end
def name def name
"##{object.name}" "##{object.display_name}"
end end
def href def href
if object.class.name == 'FeaturedTag' if object.instance_of?(FeaturedTag)
short_account_tag_url(object.account, object.tag) short_account_tag_url(object.account, object.tag)
else else
tag_url(object) tag_url(object)

View File

@ -12,4 +12,8 @@ class REST::FeaturedTagSerializer < ActiveModel::Serializer
def url def url
short_account_tag_url(object.account, object.tag) short_account_tag_url(object.account, object.tag)
end end
def name
object.display_name
end
end end

View File

@ -8,4 +8,8 @@ class REST::TagSerializer < ActiveModel::Serializer
def url def url
tag_url(object) tag_url(object)
end end
def name
object.display_name
end
end end

View File

@ -75,7 +75,7 @@
= link_to short_account_tag_path(@account, featured_tag.tag) do = link_to short_account_tag_path(@account, featured_tag.tag) do
%h4 %h4
= fa_icon 'hashtag' = fa_icon 'hashtag'
= featured_tag.name = featured_tag.display_name
%small %small
- if featured_tag.last_status_at.nil? - if featured_tag.last_status_at.nil?
= t('accounts.nothing_here') = t('accounts.nothing_here')

View File

@ -28,7 +28,7 @@ RSS::Builder.build do |doc|
end end
status.tags.each do |tag| status.tags.each do |tag|
item.category(tag.name) item.category(tag.display_name)
end end
end end
end end

View File

@ -2,7 +2,7 @@
= javascript_pack_tag 'admin', async: true, crossorigin: 'anonymous' = javascript_pack_tag 'admin', async: true, crossorigin: 'anonymous'
- content_for :page_title do - content_for :page_title do
= "##{@tag.name}" = "##{@tag.display_name}"
- if current_user.can?(:view_dashboard) - if current_user.can?(:view_dashboard)
- content_for :heading_actions do - content_for :heading_actions do
@ -53,7 +53,7 @@
= render 'shared/error_messages', object: @tag = render 'shared/error_messages', object: @tag
.fields-group .fields-group
= f.input :name, wrapper: :with_block_label = f.input :display_name, wrapper: :with_block_label
.fields-group .fields-group
= f.input :usable, as: :boolean, wrapper: :with_label = f.input :usable, as: :boolean, wrapper: :with_label

View File

@ -6,7 +6,7 @@
.pending-account__header .pending-account__header
= link_to admin_tag_path(tag.id) do = link_to admin_tag_path(tag.id) do
= fa_icon 'hashtag' = fa_icon 'hashtag'
= tag.name = tag.display_name
%br/ %br/

View File

@ -1,12 +1,12 @@
<%= raw t('admin_mailer.new_trends.new_trending_tags.title') %> <%= raw t('admin_mailer.new_trends.new_trending_tags.title') %>
<% @tags.each do |tag| %> <% @tags.each do |tag| %>
- #<%= tag.name %> - #<%= tag.display_name %>
<%= raw t('admin.trends.tags.usage_comparison', today: tag.history.get(Time.now.utc).accounts, yesterday: tag.history.get(Time.now.utc - 1.day).accounts) %> • <%= t('admin.trends.tags.current_score', score: Trends.tags.score(tag.id).round(2)) %> <%= raw t('admin.trends.tags.usage_comparison', today: tag.history.get(Time.now.utc).accounts, yesterday: tag.history.get(Time.now.utc - 1.day).accounts) %> • <%= t('admin.trends.tags.current_score', score: Trends.tags.score(tag.id).round(2)) %>
<% end %> <% end %>
<% if @lowest_trending_tag %> <% if @lowest_trending_tag %>
<%= raw t('admin_mailer.new_trends.new_trending_tags.requirements', lowest_tag_name: @lowest_trending_tag.name, lowest_tag_score: Trends.tags.score(@lowest_trending_tag.id).round(2), rank: Trends.tags.options[:review_threshold]) %> <%= raw t('admin_mailer.new_trends.new_trending_tags.requirements', lowest_tag_name: @lowest_trending_tag.display_name, lowest_tag_score: Trends.tags.score(@lowest_trending_tag.id).round(2), rank: Trends.tags.options[:review_threshold]) %>
<% else %> <% else %>
<%= raw t('admin_mailer.new_trends.new_trending_tags.no_approved_tags') %> <%= raw t('admin_mailer.new_trends.new_trending_tags.no_approved_tags') %>
<% end %> <% end %>

View File

@ -9,7 +9,7 @@
= render 'shared/error_messages', object: @featured_tag = render 'shared/error_messages', object: @featured_tag
.fields-group .fields-group
= f.input :name, wrapper: :with_block_label, hint: safe_join([t('simple_form.hints.featured_tag.name'), safe_join(@recently_used_tags.map { |tag| link_to("##{tag.name}", settings_featured_tags_path(featured_tag: { name: tag.name }), method: :post) }, ', ')], ' ') = f.input :name, wrapper: :with_block_label, hint: safe_join([t('simple_form.hints.featured_tag.name'), safe_join(@recently_used_tags.map { |tag| link_to("##{tag.display_name}", settings_featured_tags_path(featured_tag: { name: tag.name }), method: :post) }, ', ')], ' ')
.actions .actions
= f.button :button, t('featured_tags.add_new'), type: :submit = f.button :button, t('featured_tags.add_new'), type: :submit

View File

@ -1,6 +1,6 @@
= opengraph 'og:site_name', t('about.hosted_on', domain: site_hostname) = opengraph 'og:site_name', t('about.hosted_on', domain: site_hostname)
= opengraph 'og:url', tag_url(@tag) = opengraph 'og:url', tag_url(@tag)
= opengraph 'og:type', 'website' = opengraph 'og:type', 'website'
= opengraph 'og:title', "##{@tag.name}" = opengraph 'og:title', "##{@tag.display_name}"
= opengraph 'og:description', strip_tags(t('about.about_hashtag_html', hashtag: @tag.name)) = opengraph 'og:description', strip_tags(t('about.about_hashtag_html', hashtag: @tag.display_name))
= opengraph 'twitter:card', 'summary' = opengraph 'twitter:card', 'summary'

View File

@ -1,5 +1,5 @@
- content_for :page_title do - content_for :page_title do
= "##{@tag.name}" = "##{@tag.display_name}"
- content_for :header_tags do - content_for :header_tags do
%meta{ name: 'robots', content: 'noindex' }/ %meta{ name: 'robots', content: 'noindex' }/
@ -9,8 +9,8 @@
= render 'og' = render 'og'
.page-header .page-header
%h1= "##{@tag.name}" %h1= "##{@tag.display_name}"
%p= t('about.about_hashtag_html', hashtag: @tag.name) %p= t('about.about_hashtag_html', hashtag: @tag.display_name)
#mastodon-timeline{ data: { props: Oj.dump(default_props.merge(hashtag: @tag.name, local: @local)) }} #mastodon-timeline{ data: { props: Oj.dump(default_props.merge(hashtag: @tag.name, local: @local)) }}
.notranslate#modal-container .notranslate#modal-container

View File

@ -1,6 +1,6 @@
RSS::Builder.build do |doc| RSS::Builder.build do |doc|
doc.title("##{@tag.name}") doc.title("##{@tag.display_name}")
doc.description(I18n.t('rss.descriptions.tag', hashtag: @tag.name)) doc.description(I18n.t('rss.descriptions.tag', hashtag: @tag.display_name))
doc.link(tag_url(@tag)) doc.link(tag_url(@tag))
doc.last_build_date(@statuses.first.created_at) if @statuses.any? doc.last_build_date(@statuses.first.created_at) if @statuses.any?
doc.generator("Mastodon v#{Mastodon::Version.to_s}") doc.generator("Mastodon v#{Mastodon::Version.to_s}")
@ -26,7 +26,7 @@ RSS::Builder.build do |doc|
end end
status.tags.each do |tag| status.tags.each do |tag|
item.category(tag.name) item.category(tag.display_name)
end end
end end
end end

View File

@ -24,6 +24,7 @@ ActiveSupport::Inflector.inflections(:en) do |inflect|
inflect.acronym 'RSS' inflect.acronym 'RSS'
inflect.acronym 'REST' inflect.acronym 'REST'
inflect.acronym 'URL' inflect.acronym 'URL'
inflect.acronym 'ASCII'
inflect.singular 'data', 'data' inflect.singular 'data', 'data'
end end

View File

@ -0,0 +1,5 @@
class AddDisplayNameToTags < ActiveRecord::Migration[6.1]
def change
add_column :tags, :display_name, :string
end
end

View File

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2022_07_04_024901) do ActiveRecord::Schema.define(version: 2022_07_10_102457) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
@ -940,6 +940,7 @@ ActiveRecord::Schema.define(version: 2022_07_04_024901) do
t.datetime "last_status_at" t.datetime "last_status_at"
t.float "max_score" t.float "max_score"
t.datetime "max_score_at" t.datetime "max_score_at"
t.string "display_name"
t.index "lower((name)::text) text_pattern_ops", name: "index_tags_on_name_lower_btree", unique: true t.index "lower((name)::text) text_pattern_ops", name: "index_tags_on_name_lower_btree", unique: true
end end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
require 'rails_helper'
describe HashtagNormalizer do
subject { described_class.new }
describe '#normalize' do
it 'converts full-width Latin characters into basic Latin characters' do
expect(subject.normalize('')).to eq 'synthwave'
end
it 'converts half-width Katakana into Kana characters' do
expect(subject.normalize('シーサイドライナー')).to eq 'シーサイドライナー'
end
it 'converts modified Latin characters into basic Latin characters' do
expect(subject.normalize('BLÅHAJ')).to eq 'blahaj'
end
it 'strips out invalid characters' do
expect(subject.normalize('#foo')).to eq 'foo'
end
it 'keeps valid characters' do
expect(subject.normalize('a·b')).to eq 'a·b'
end
end
end

View File

@ -91,7 +91,7 @@ RSpec.describe Tag, type: :model do
upcase_string = 'abcABCやゆよ' upcase_string = 'abcABCやゆよ'
downcase_string = 'abcabcやゆよ'; downcase_string = 'abcabcやゆよ';
tag = Fabricate(:tag, name: downcase_string) tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
expect(Tag.find_normalized(upcase_string)).to eq tag expect(Tag.find_normalized(upcase_string)).to eq tag
end end
end end
@ -101,12 +101,12 @@ RSpec.describe Tag, type: :model do
upcase_string = 'abcABCやゆよ' upcase_string = 'abcABCやゆよ'
downcase_string = 'abcabcやゆよ'; downcase_string = 'abcabcやゆよ';
tag = Fabricate(:tag, name: downcase_string) tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
expect(Tag.matches_name(upcase_string)).to eq [tag] expect(Tag.matches_name(upcase_string)).to eq [tag]
end end
it 'uses the LIKE operator' do it 'uses the LIKE operator' do
expect(Tag.matches_name('100%abc').to_sql).to eq %q[SELECT "tags".* FROM "tags" WHERE LOWER("tags"."name") LIKE LOWER('100\\%abc%')] expect(Tag.matches_name('100%abc').to_sql).to eq %q[SELECT "tags".* FROM "tags" WHERE LOWER("tags"."name") LIKE LOWER('100abc%')]
end end
end end
@ -115,7 +115,7 @@ RSpec.describe Tag, type: :model do
upcase_string = 'abcABCやゆよ' upcase_string = 'abcABCやゆよ'
downcase_string = 'abcabcやゆよ'; downcase_string = 'abcabcやゆよ';
tag = Fabricate(:tag, name: downcase_string) tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
expect(Tag.matching_name(upcase_string)).to eq [tag] expect(Tag.matching_name(upcase_string)).to eq [tag]
end end
end end

View File

@ -892,6 +892,34 @@ const startWorker = async (workerId) => {
return arr; return arr;
}; };
/**
* See app/lib/ascii_folder.rb for the canon definitions
* of these constants
*/
const NON_ASCII_CHARS = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž';
const EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz';
/**
* @param {string} str
* @return {string}
*/
const foldToASCII = str => {
const regex = new RegExp(NON_ASCII_CHARS.split('').join('|'), 'g');
return str.replace(regex, match => {
const index = NON_ASCII_CHARS.indexOf(match);
return EQUIVALENT_ASCII_CHARS[index];
});
};
/**
* @param {string} str
* @return {string}
*/
const normalizeHashtag = str => {
return foldToASCII(str.normalize('NFKC').toLowerCase()).replace(/[^\p{L}\p{N}_\u00b7\u200c]/gu, '');
};
/** /**
* @param {any} req * @param {any} req
* @param {string} name * @param {string} name
@ -968,7 +996,7 @@ const startWorker = async (workerId) => {
reject('No tag for stream provided'); reject('No tag for stream provided');
} else { } else {
resolve({ resolve({
channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}`], channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}`],
options: { needsFiltering: true }, options: { needsFiltering: true },
}); });
} }
@ -979,7 +1007,7 @@ const startWorker = async (workerId) => {
reject('No tag for stream provided'); reject('No tag for stream provided');
} else { } else {
resolve({ resolve({
channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}:local`], channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}:local`],
options: { needsFiltering: true }, options: { needsFiltering: true },
}); });
} }