Change domain block CSV parsing to be more robust and handle more lists (#21470)

* Change domain block CSV parsing to be more robust and handle more lists

* Add some tests

* Improve domain block import validation and reporting
pull/2087/head
Claire 2023-01-18 16:20:52 +01:00 committed by GitHub
parent 302fcb9788
commit fcc4c9b34a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 90 additions and 39 deletions

View File

@ -23,9 +23,7 @@ module Admin
@import = Admin::Import.new(import_params)
return render :new unless @import.validate
parse_import_data!(export_headers)
@data.take(Admin::Import::ROWS_PROCESSING_LIMIT).each do |row|
@import.csv_rows.each do |row|
domain = row['#domain'].strip
next if DomainAllow.allowed?(domain)

View File

@ -23,24 +23,30 @@ module Admin
@import = Admin::Import.new(import_params)
return render :new unless @import.validate
parse_import_data!(export_headers)
@global_private_comment = I18n.t('admin.export_domain_blocks.import.private_comment_template', source: @import.data_file_name, date: I18n.l(Time.now.utc))
@form = Form::DomainBlockBatch.new
@domain_blocks = @data.take(Admin::Import::ROWS_PROCESSING_LIMIT).filter_map do |row|
@domain_blocks = @import.csv_rows.filter_map do |row|
domain = row['#domain'].strip
next if DomainBlock.rule_for(domain).present?
domain_block = DomainBlock.new(domain: domain,
severity: row['#severity'].strip,
reject_media: row['#reject_media'].strip,
reject_reports: row['#reject_reports'].strip,
severity: row.fetch('#severity', :suspend),
reject_media: row.fetch('#reject_media', false),
reject_reports: row.fetch('#reject_reports', false),
private_comment: @global_private_comment,
public_comment: row['#public_comment']&.strip,
obfuscate: row['#obfuscate'].strip)
public_comment: row['#public_comment'],
obfuscate: row.fetch('#obfuscate', false))
domain_block if domain_block.valid?
if domain_block.invalid?
flash.now[:alert] = I18n.t('admin.export_domain_blocks.invalid_domain_block', error: domain_block.errors.full_messages.join(', '))
next
end
domain_block
rescue ArgumentError => e
flash.now[:alert] = I18n.t('admin.export_domain_blocks.invalid_domain_block', error: e.message)
next
end
@warning_domains = Instance.where(domain: @domain_blocks.map(&:domain)).where('EXISTS (SELECT 1 FROM follows JOIN accounts ON follows.account_id = accounts.id OR follows.target_account_id = accounts.id WHERE accounts.domain = instances.domain)').pluck(:domain)

View File

@ -26,14 +26,4 @@ module AdminExportControllerConcern
def import_params
params.require(:admin_import).permit(:data)
end
def import_data_path
params[:admin_import][:data].path
end
def parse_import_data!(default_headers)
data = CSV.read(import_data_path, headers: true, encoding: 'UTF-8')
data = CSV.read(import_data_path, headers: default_headers, encoding: 'UTF-8') unless data.headers&.first&.strip&.include?(default_headers[0])
@data = data.reject(&:blank?)
end
end

View File

@ -1,5 +1,7 @@
# frozen_string_literal: true
require 'csv'
# A non-activerecord helper class for csv upload
class Admin::Import
include ActiveModel::Model
@ -15,17 +17,46 @@ class Admin::Import
data.original_filename
end
def csv_rows
csv_data.rewind
csv_data.take(ROWS_PROCESSING_LIMIT + 1)
end
private
def csv_data
return @csv_data if defined?(@csv_data)
csv_converter = lambda do |field, field_info|
case field_info.header
when '#domain', '#public_comment'
field&.strip
when '#severity'
field&.strip&.to_sym
when '#reject_media', '#reject_reports', '#obfuscate'
ActiveModel::Type::Boolean.new.cast(field)
else
field
end
end
@csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: true, converters: csv_converter)
@csv_data.take(1) # Ensure the headers are read
@csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: ['#domain'], converters: csv_converter) unless @csv_data.headers&.first == '#domain'
@csv_data
end
def csv_row_count
return @csv_row_count if defined?(@csv_row_count)
csv_data.rewind
@csv_row_count = csv_data.take(ROWS_PROCESSING_LIMIT + 2).count
end
def validate_data
return if data.blank?
csv_data = CSV.read(data.path, encoding: 'UTF-8')
row_count = csv_data.size
row_count -= 1 if csv_data.first&.first == '#domain'
errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if row_count > ROWS_PROCESSING_LIMIT
return if data.nil?
errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if csv_row_count > ROWS_PROCESSING_LIMIT
rescue CSV::MalformedCSVError => e
errors.add(:data, I18n.t('imports.errors.invalid_csv_file', error: e.message))
end

View File

@ -441,6 +441,7 @@ en:
private_comment_description_html: 'To help you track where imported blocks come from, imported blocks will be created with the following private comment: <q>%{comment}</q>'
private_comment_template: Imported from %{source} on %{date}
title: Import domain blocks
invalid_domain_block: 'One or more domain blocks were skipped because of the following error(s): %{error}'
new:
title: Import domain blocks
no_file: No file selected

View File

@ -9,9 +9,9 @@ RSpec.describe Admin::ExportDomainBlocksController, type: :controller do
describe 'GET #export' do
it 'renders instances' do
Fabricate(:domain_block, domain: 'bad.domain', severity: 'silence', public_comment: 'bad')
Fabricate(:domain_block, domain: 'worse.domain', severity: 'suspend', reject_media: true, reject_reports: true, public_comment: 'worse', obfuscate: true)
Fabricate(:domain_block, domain: 'reject.media', severity: 'noop', reject_media: true, public_comment: 'reject media')
Fabricate(:domain_block, domain: 'bad.domain', severity: 'silence', public_comment: 'bad server')
Fabricate(:domain_block, domain: 'worse.domain', severity: 'suspend', reject_media: true, reject_reports: true, public_comment: 'worse server', obfuscate: true)
Fabricate(:domain_block, domain: 'reject.media', severity: 'noop', reject_media: true, public_comment: 'reject media and test unicode characters ♥')
Fabricate(:domain_block, domain: 'no.op', severity: 'noop', public_comment: 'noop')
get :export, params: { format: :csv }
@ -21,10 +21,32 @@ RSpec.describe Admin::ExportDomainBlocksController, type: :controller do
end
describe 'POST #import' do
it 'blocks imported domains' do
post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks.csv') } }
context 'with complete domain blocks CSV' do
before do
post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks.csv') } }
end
expect(assigns(:domain_blocks).map(&:domain)).to match_array ['bad.domain', 'worse.domain', 'reject.media']
it 'renders page with expected domain blocks' do
expect(assigns(:domain_blocks).map { |block| [block.domain, block.severity.to_sym] }).to match_array [['bad.domain', :silence], ['worse.domain', :suspend], ['reject.media', :noop]]
end
it 'returns http success' do
expect(response).to have_http_status(200)
end
end
context 'with a list of only domains' do
before do
post :import, params: { admin_import: { data: fixture_file_upload('domain_blocks_list.txt') } }
end
it 'renders page with expected domain blocks' do
expect(assigns(:domain_blocks).map { |block| [block.domain, block.severity.to_sym] }).to match_array [['bad.domain', :suspend], ['worse.domain', :suspend], ['reject.media', :suspend]]
end
it 'returns http success' do
expect(response).to have_http_status(200)
end
end
end

View File

@ -1,4 +1,4 @@
#domain,#severity,#reject_media,#reject_reports,#public_comment,#obfuscate
bad.domain,silence,false,false,bad,false
worse.domain,suspend,true,true,worse,true
reject.media,noop,true,false,reject media,false
bad.domain,silence,false,false,bad server,false
worse.domain,suspend,true,true,worse server,true
reject.media,noop,true,false,reject media and test unicode characters ♥,false

1 #domain #severity #reject_media #reject_reports #public_comment #obfuscate
2 bad.domain silence false false bad bad server false
3 worse.domain suspend true true worse worse server true
4 reject.media noop true false reject media reject media and test unicode characters ♥ false

View File

@ -0,0 +1,3 @@
bad.domain
worse.domain
reject.media