forked from treehouse/mastodon
Bump chewy from 5.2.0 to 7.2.3 (supports Elasticsearch 7.x) (#16915)
* Bump chewy from 5.2.0 to 7.2.2 * fix style (codeclimate) * fix style * fix style * Bump chewy from 7.2.2 to 7.2.3rebase/4.0.0rc2
parent
2b6a25c609
commit
3419d3ec84
2
Gemfile
2
Gemfile
|
@ -30,7 +30,7 @@ gem 'bootsnap', '~> 1.9.1', require: false
|
|||
gem 'browser'
|
||||
gem 'charlock_holmes', '~> 0.7.7'
|
||||
gem 'iso-639'
|
||||
gem 'chewy', '~> 5.2'
|
||||
gem 'chewy', '~> 7.2'
|
||||
gem 'cld3', '~> 3.4.2'
|
||||
gem 'devise', '~> 4.8'
|
||||
gem 'devise-two-factor', '~> 4.0'
|
||||
|
|
38
Gemfile.lock
38
Gemfile.lock
|
@ -147,9 +147,9 @@ GEM
|
|||
activesupport
|
||||
cbor (0.5.9.6)
|
||||
charlock_holmes (0.7.7)
|
||||
chewy (5.2.0)
|
||||
chewy (7.2.3)
|
||||
activesupport (>= 5.2)
|
||||
elasticsearch (>= 2.0.0)
|
||||
elasticsearch (>= 7.12.0, < 7.14.0)
|
||||
elasticsearch-dsl
|
||||
chunky_png (1.4.0)
|
||||
cld3 (3.4.2)
|
||||
|
@ -197,13 +197,13 @@ GEM
|
|||
railties (>= 3.2)
|
||||
e2mmap (0.1.0)
|
||||
ed25519 (1.2.4)
|
||||
elasticsearch (7.10.1)
|
||||
elasticsearch-api (= 7.10.1)
|
||||
elasticsearch-transport (= 7.10.1)
|
||||
elasticsearch-api (7.10.1)
|
||||
elasticsearch (7.13.3)
|
||||
elasticsearch-api (= 7.13.3)
|
||||
elasticsearch-transport (= 7.13.3)
|
||||
elasticsearch-api (7.13.3)
|
||||
multi_json
|
||||
elasticsearch-dsl (0.1.9)
|
||||
elasticsearch-transport (7.10.1)
|
||||
elasticsearch-dsl (0.1.10)
|
||||
elasticsearch-transport (7.13.3)
|
||||
faraday (~> 1)
|
||||
multi_json
|
||||
encryptor (3.0.0)
|
||||
|
@ -214,11 +214,25 @@ GEM
|
|||
fabrication (2.22.0)
|
||||
faker (2.19.0)
|
||||
i18n (>= 1.6, < 2)
|
||||
faraday (1.3.0)
|
||||
faraday (1.8.0)
|
||||
faraday-em_http (~> 1.0)
|
||||
faraday-em_synchrony (~> 1.0)
|
||||
faraday-excon (~> 1.1)
|
||||
faraday-httpclient (~> 1.0.1)
|
||||
faraday-net_http (~> 1.0)
|
||||
faraday-net_http_persistent (~> 1.1)
|
||||
faraday-patron (~> 1.0)
|
||||
faraday-rack (~> 1.0)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
ruby2_keywords
|
||||
ruby2_keywords (>= 0.0.4)
|
||||
faraday-em_http (1.0.0)
|
||||
faraday-em_synchrony (1.0.0)
|
||||
faraday-excon (1.1.0)
|
||||
faraday-httpclient (1.0.1)
|
||||
faraday-net_http (1.0.1)
|
||||
faraday-net_http_persistent (1.2.0)
|
||||
faraday-patron (1.0.0)
|
||||
faraday-rack (1.0.0)
|
||||
fast_blank (1.0.1)
|
||||
fastimage (2.2.5)
|
||||
ffi (1.15.4)
|
||||
|
@ -539,7 +553,7 @@ GEM
|
|||
ruby-saml (1.13.0)
|
||||
nokogiri (>= 1.10.5)
|
||||
rexml
|
||||
ruby2_keywords (0.0.4)
|
||||
ruby2_keywords (0.0.5)
|
||||
rufus-scheduler (3.7.0)
|
||||
fugit (~> 1.1, >= 1.1.6)
|
||||
safety_net_attestation (0.4.0)
|
||||
|
@ -686,7 +700,7 @@ DEPENDENCIES
|
|||
capistrano-yarn (~> 2.0)
|
||||
capybara (~> 3.36)
|
||||
charlock_holmes (~> 0.7.7)
|
||||
chewy (~> 5.2)
|
||||
chewy (~> 7.2)
|
||||
cld3 (~> 3.4.2)
|
||||
climate_control (~> 0.2)
|
||||
color_diff (~> 0.1)
|
||||
|
|
|
@ -23,21 +23,21 @@ class AccountsIndex < Chewy::Index
|
|||
},
|
||||
}
|
||||
|
||||
define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do
|
||||
root date_detection: false do
|
||||
field :id, type: 'long'
|
||||
index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? }
|
||||
|
||||
field :display_name, type: 'text', analyzer: 'content' do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
root date_detection: false do
|
||||
field :id, type: 'long'
|
||||
|
||||
field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
|
||||
field :following_count, type: 'long', value: ->(account) { account.following.local.count }
|
||||
field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
|
||||
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
|
||||
field :display_name, type: 'text', analyzer: 'content' do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
|
||||
field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
|
||||
field :following_count, type: 'long', value: ->(account) { account.following.local.count }
|
||||
field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
|
||||
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
|
||||
end
|
||||
end
|
||||
|
|
|
@ -31,36 +31,36 @@ class StatusesIndex < Chewy::Index
|
|||
},
|
||||
}
|
||||
|
||||
define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) do
|
||||
crutch :mentions do |collection|
|
||||
data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll)
|
||||
|
||||
crutch :mentions do |collection|
|
||||
data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
crutch :favourites do |collection|
|
||||
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
crutch :reblogs do |collection|
|
||||
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
crutch :bookmarks do |collection|
|
||||
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
root date_detection: false do
|
||||
field :id, type: 'long'
|
||||
field :account_id, type: 'long'
|
||||
|
||||
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
|
||||
field :stemmed, type: 'text', analyzer: 'content'
|
||||
end
|
||||
|
||||
crutch :favourites do |collection|
|
||||
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
crutch :reblogs do |collection|
|
||||
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
crutch :bookmarks do |collection|
|
||||
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
|
||||
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
|
||||
end
|
||||
|
||||
root date_detection: false do
|
||||
field :id, type: 'long'
|
||||
field :account_id, type: 'long'
|
||||
|
||||
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
|
||||
field :stemmed, type: 'text', analyzer: 'content'
|
||||
end
|
||||
|
||||
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
|
||||
end
|
||||
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
|
||||
end
|
||||
end
|
||||
|
|
|
@ -23,15 +23,15 @@ class TagsIndex < Chewy::Index
|
|||
},
|
||||
}
|
||||
|
||||
define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do
|
||||
root date_detection: false do
|
||||
field :name, type: 'text', analyzer: 'content' do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? }
|
||||
|
||||
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
|
||||
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
|
||||
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
|
||||
root date_detection: false do
|
||||
field :name, type: 'text', analyzer: 'content' do
|
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
end
|
||||
|
||||
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
|
||||
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
|
||||
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
|
||||
end
|
||||
end
|
||||
|
|
|
@ -143,7 +143,7 @@ class Account < ApplicationRecord
|
|||
|
||||
delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
|
||||
|
||||
update_index('accounts#account', :self)
|
||||
update_index('accounts', :self)
|
||||
|
||||
def local?
|
||||
domain.nil?
|
||||
|
|
|
@ -19,5 +19,5 @@ class AccountStat < ApplicationRecord
|
|||
|
||||
belongs_to :account, inverse_of: :account_stat
|
||||
|
||||
update_index('accounts#account', :account)
|
||||
update_index('accounts', :account)
|
||||
end
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
class Bookmark < ApplicationRecord
|
||||
include Paginable
|
||||
|
||||
update_index('statuses#status', :status) if Chewy.enabled?
|
||||
update_index('statuses', :status) if Chewy.enabled?
|
||||
|
||||
belongs_to :account, inverse_of: :bookmarks
|
||||
belongs_to :status, inverse_of: :bookmarks
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
class Favourite < ApplicationRecord
|
||||
include Paginable
|
||||
|
||||
update_index('statuses#status', :status)
|
||||
update_index('statuses', :status)
|
||||
|
||||
belongs_to :account, inverse_of: :favourites
|
||||
belongs_to :status, inverse_of: :favourites
|
||||
|
|
|
@ -42,7 +42,7 @@ class Status < ApplicationRecord
|
|||
# will be based on current time instead of `created_at`
|
||||
attr_accessor :override_timestamps
|
||||
|
||||
update_index('statuses#status', :proper)
|
||||
update_index('statuses', :proper)
|
||||
|
||||
enum visibility: [:public, :unlisted, :private, :direct, :limited], _suffix: :visibility
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ class Tag < ApplicationRecord
|
|||
scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) }
|
||||
scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
|
||||
|
||||
update_index('tags#tag', :self)
|
||||
update_index('tags', :self)
|
||||
|
||||
def to_param
|
||||
name
|
||||
|
|
|
@ -31,7 +31,7 @@ class BatchedRemoveStatusService < BaseService
|
|||
|
||||
# Since we skipped all callbacks, we also need to manually
|
||||
# deindex the statuses
|
||||
Chewy.strategy.current.update(StatusesIndex::Status, statuses_and_reblogs) if Chewy.enabled?
|
||||
Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
|
||||
|
||||
return if options[:skip_side_effects]
|
||||
|
||||
|
|
|
@ -189,7 +189,7 @@ class DeleteAccountService < BaseService
|
|||
@account.favourites.in_batches do |favourites|
|
||||
ids = favourites.pluck(:status_id)
|
||||
StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
|
||||
Chewy.strategy.current.update(StatusesIndex::Status, ids) if Chewy.enabled?
|
||||
Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
|
||||
Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
|
||||
favourites.delete_all
|
||||
end
|
||||
|
@ -197,7 +197,7 @@ class DeleteAccountService < BaseService
|
|||
|
||||
def purge_bookmarks!
|
||||
@account.bookmarks.in_batches do |bookmarks|
|
||||
Chewy.strategy.current.update(StatusesIndex::Status, bookmarks.pluck(:status_id)) if Chewy.enabled?
|
||||
Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
|
||||
bookmarks.delete_all
|
||||
end
|
||||
end
|
||||
|
|
|
@ -37,23 +37,3 @@ end
|
|||
# Mastodon is run with hidden services enabled, because
|
||||
# ElasticSearch is *not* supposed to be accessed through a proxy
|
||||
Faraday.ignore_env_proxy = true
|
||||
|
||||
# Elasticsearch 7.x workaround
|
||||
Elasticsearch::Transport::Client.prepend Module.new {
|
||||
def search(arguments = {})
|
||||
arguments[:rest_total_hits_as_int] = true
|
||||
super arguments
|
||||
end
|
||||
}
|
||||
|
||||
Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
|
||||
def create(arguments = {})
|
||||
arguments[:include_type_name] = true
|
||||
super arguments
|
||||
end
|
||||
|
||||
def put_mapping(arguments = {})
|
||||
arguments[:include_type_name] = true
|
||||
super arguments
|
||||
end
|
||||
}
|
||||
|
|
|
@ -64,11 +64,7 @@ module Mastodon
|
|||
progress.title = 'Estimating workload '
|
||||
|
||||
# Estimate the amount of data that has to be imported first
|
||||
indices.each do |index|
|
||||
index.types.each do |type|
|
||||
progress.total = (progress.total || 0) + type.adapter.default_scope.count
|
||||
end
|
||||
end
|
||||
progress.total = indices.sum { |index| index.adapter.default_scope.count }
|
||||
|
||||
# Now import all the actual data. Mind that unlike chewy:sync, we don't
|
||||
# fetch and compare all record IDs from the database and the index to
|
||||
|
@ -80,67 +76,68 @@ module Mastodon
|
|||
batch_size = 1_000
|
||||
slice_size = (batch_size / options[:concurrency]).ceil
|
||||
|
||||
index.types.each do |type|
|
||||
type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
|
||||
futures = []
|
||||
index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
|
||||
futures = []
|
||||
|
||||
batch.each_slice(slice_size) do |records|
|
||||
futures << Concurrent::Future.execute(executor: pool) do
|
||||
begin
|
||||
if !progress.total.nil? && progress.progress + records.size > progress.total
|
||||
# The number of items has changed between start and now,
|
||||
# since there is no good way to predict the final count from
|
||||
# here, just change the progress bar to an indeterminate one
|
||||
batch.each_slice(slice_size) do |records|
|
||||
futures << Concurrent::Future.execute(executor: pool) do
|
||||
begin
|
||||
if !progress.total.nil? && progress.progress + records.size > progress.total
|
||||
# The number of items has changed between start and now,
|
||||
# since there is no good way to predict the final count from
|
||||
# here, just change the progress bar to an indeterminate one
|
||||
|
||||
progress.total = nil
|
||||
progress.total = nil
|
||||
end
|
||||
|
||||
grouped_records = nil
|
||||
bulk_body = nil
|
||||
index_count = 0
|
||||
delete_count = 0
|
||||
|
||||
ActiveRecord::Base.connection_pool.with_connection do
|
||||
grouped_records = records.to_a.group_by do |record|
|
||||
index.adapter.send(:delete_from_index?, record) ? :delete : :to_index
|
||||
end
|
||||
|
||||
grouped_records = nil
|
||||
bulk_body = nil
|
||||
index_count = 0
|
||||
delete_count = 0
|
||||
bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
|
||||
end
|
||||
|
||||
ActiveRecord::Base.connection_pool.with_connection do
|
||||
grouped_records = type.adapter.send(:grouped_objects, records)
|
||||
bulk_body = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body
|
||||
end
|
||||
index_count = grouped_records[:to_index].size if grouped_records.key?(:to_index)
|
||||
delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
|
||||
|
||||
index_count = grouped_records[:index].size if grouped_records.key?(:index)
|
||||
delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
|
||||
# The following is an optimization for statuses specifically, since
|
||||
# we want to de-index statuses that cannot be searched by anybody,
|
||||
# but can't use Chewy's delete_if logic because it doesn't use
|
||||
# crutches and our searchable_by logic depends on them
|
||||
if index == StatusesIndex
|
||||
bulk_body.map! do |entry|
|
||||
if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
|
||||
index_count -= 1
|
||||
delete_count += 1
|
||||
|
||||
# The following is an optimization for statuses specifically, since
|
||||
# we want to de-index statuses that cannot be searched by anybody,
|
||||
# but can't use Chewy's delete_if logic because it doesn't use
|
||||
# crutches and our searchable_by logic depends on them
|
||||
if type == StatusesIndex::Status
|
||||
bulk_body.map! do |entry|
|
||||
if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
|
||||
index_count -= 1
|
||||
delete_count += 1
|
||||
|
||||
{ delete: entry[:index].except(:data) }
|
||||
else
|
||||
entry
|
||||
end
|
||||
{ delete: entry[:to_index].except(:data) }
|
||||
else
|
||||
entry
|
||||
end
|
||||
end
|
||||
|
||||
Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body)
|
||||
|
||||
progress.progress += records.size
|
||||
|
||||
added.increment(index_count)
|
||||
removed.increment(delete_count)
|
||||
|
||||
sleep 1
|
||||
rescue => e
|
||||
progress.log pastel.red("Error importing #{index}: #{e}")
|
||||
end
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
|
||||
|
||||
progress.progress += records.size
|
||||
|
||||
added.increment(index_count)
|
||||
removed.increment(delete_count)
|
||||
|
||||
sleep 1
|
||||
rescue => e
|
||||
progress.log pastel.red("Error importing #{index}: #{e}")
|
||||
end
|
||||
end
|
||||
|
||||
futures.map(&:value)
|
||||
end
|
||||
|
||||
futures.map(&:value)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue