From b9fbcbfe4e0a15fcf8a457ce17ea080f0eb939fc Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sat, 27 Jul 2019 04:42:08 +0200 Subject: [PATCH] Add search syntax for operators and phrases (#11411) --- Gemfile | 1 + Gemfile.lock | 2 + app/lib/search_query_parser.rb | 14 +++++ app/lib/search_query_transformer.rb | 86 ++++++++++++++++++++++++++++ app/services/search_service.rb | 9 ++- spec/services/search_service_spec.rb | 6 +- 6 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 app/lib/search_query_parser.rb create mode 100644 app/lib/search_query_transformer.rb diff --git a/Gemfile b/Gemfile index e969c4ae0cb..96eb44af79c 100644 --- a/Gemfile +++ b/Gemfile @@ -64,6 +64,7 @@ gem 'nsa', '~> 0.2' gem 'oj', '~> 3.8' gem 'ostatus2', '~> 2.0' gem 'ox', '~> 2.11' +gem 'parslet' gem 'posix-spawn', git: 'https://github.com/rtomayko/posix-spawn', ref: '58465d2e213991f8afb13b984854a49fcdcc980c' gem 'pundit', '~> 2.0' gem 'premailer-rails' diff --git a/Gemfile.lock b/Gemfile.lock index f435b3a3537..5adae6f9ce0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -404,6 +404,7 @@ GEM parallel parser (2.6.3.0) ast (~> 2.4.0) + parslet (1.8.2) pastel (0.7.2) equatable (~> 0.5.0) tty-color (~> 0.4.0) @@ -724,6 +725,7 @@ DEPENDENCIES paperclip (~> 6.0) paperclip-av-transcoder (~> 0.6) parallel_tests (~> 2.29) + parslet pg (~> 1.1) pghero (~> 2.2) pkg-config (~> 1.3) diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb new file mode 100644 index 00000000000..405ad15b899 --- /dev/null +++ b/app/lib/search_query_parser.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SearchQueryParser < Parslet::Parser + rule(:term) { match('[^\s":]').repeat(1).as(:term) } + rule(:quote) { str('"') } + rule(:colon) { str(':') } + rule(:space) { match('\s').repeat(1) } + rule(:operator) { (str('+') | str('-')).as(:operator) } + rule(:prefix) { (term >> colon).as(:prefix) } + rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } + rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term)).as(:clause) } + rule(:query) { (clause >> space.maybe).repeat.as(:query) } + root(:query) +end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb new file mode 100644 index 00000000000..2c4144790ba --- /dev/null +++ b/app/lib/search_query_transformer.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +class SearchQueryTransformer < Parslet::Transform + class Query + attr_reader :should_clauses, :must_not_clauses, :must_clauses + + def initialize(clauses) + grouped = clauses.chunk(&:operator).to_h + @should_clauses = grouped.fetch(:should, []) + @must_not_clauses = grouped.fetch(:must_not, []) + @must_clauses = grouped.fetch(:must, []) + end + + def apply(search) + should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) } + must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) } + must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) } + search.query.minimum_should_match(1) + end + + private + + def clause_to_query(clause) + case clause + when TermClause + { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } } + when PhraseClause + { match_phrase: { text: { query: clause.phrase } } } + else + raise "Unexpected clause type: #{clause}" + end + end + end + + class Operator + class << self + def symbol(str) + case str + when '+' + :must + when '-' + :must_not + when nil + :should + else + raise "Unknown operator: #{str}" + end + end + end + end + + class TermClause + attr_reader :prefix, :operator, :term + + def initialize(prefix, operator, term) + @prefix = prefix + @operator = Operator.symbol(operator) + @term = term + end + end + + class PhraseClause + attr_reader :prefix, :operator, :phrase + + def initialize(prefix, operator, phrase) + @prefix = prefix + @operator = Operator.symbol(operator) + @phrase = phrase + end + end + + rule(clause: subtree(:clause)) do + prefix = clause[:prefix][:term].to_s if clause[:prefix] + operator = clause[:operator]&.to_s + + if clause[:term] + TermClause.new(prefix, operator, clause[:term].to_s) + elsif clause[:phrase] + PhraseClause.new(prefix, operator, clause[:phrase].map { |p| p[:term].to_s }.join(' ')) + else + raise "Unexpected clause type: #{clause}" + end + end + + rule(query: sequence(:clauses)) { Query.new(clauses) } +end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index e0da61dac5b..769d1ac7a41 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -33,8 +33,7 @@ class SearchService < BaseService end def perform_statuses_search! - definition = StatusesIndex.filter(term: { searchable_by: @account.id }) - .query(multi_match: { type: 'most_fields', query: @query, operator: 'and', fields: %w(text text.stemmed) }) + definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id })) if @options[:account_id].present? definition = definition.filter(term: { account_id: @options[:account_id] }) @@ -70,7 +69,7 @@ class SearchService < BaseService end def url_query? - @options[:type].blank? && @query =~ /\Ahttps?:\/\// + @resolve && @options[:type].blank? && @query =~ /\Ahttps?:\/\// end def url_resource_results @@ -120,4 +119,8 @@ class SearchService < BaseService domain_blocking_by_domain: Account.domain_blocking_map_by_domain(domains, account.id), } end + + def parsed_query + SearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query)) + end end diff --git a/spec/services/search_service_spec.rb b/spec/services/search_service_spec.rb index d064cd9b85d..ade306ed289 100644 --- a/spec/services/search_service_spec.rb +++ b/spec/services/search_service_spec.rb @@ -27,7 +27,7 @@ describe SearchService, type: :service do it 'returns the empty results' do service = double(call: nil) allow(ResolveURLService).to receive(:new).and_return(service) - results = subject.call(@query, nil, 10) + results = subject.call(@query, nil, 10, resolve: true) expect(service).to have_received(:call).with(@query, on_behalf_of: nil) expect(results).to eq empty_results @@ -40,7 +40,7 @@ describe SearchService, type: :service do service = double(call: account) allow(ResolveURLService).to receive(:new).and_return(service) - results = subject.call(@query, nil, 10) + results = subject.call(@query, nil, 10, resolve: true) expect(service).to have_received(:call).with(@query, on_behalf_of: nil) expect(results).to eq empty_results.merge(accounts: [account]) end @@ -52,7 +52,7 @@ describe SearchService, type: :service do service = double(call: status) allow(ResolveURLService).to receive(:new).and_return(service) - results = subject.call(@query, nil, 10) + results = subject.call(@query, nil, 10, resolve: true) expect(service).to have_received(:call).with(@query, on_behalf_of: nil) expect(results).to eq empty_results.merge(statuses: [status]) end