From 923629e5ca25594a8d3048c07d6789d44318a180 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Thu, 27 Jun 2024 05:10:01 +0200 Subject: [PATCH 1/2] implement display search results in multiple languages --- helpers/search_helper.rb | 162 ++++++++++++++------- test/controllers/test_search_controller.rb | 2 +- 2 files changed, 108 insertions(+), 56 deletions(-) diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index 06e6a78f..8b679986 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -30,51 +30,51 @@ module SearchHelper MATCH_TYPE_LABELGENERATED = "labelGenerated" MATCH_TYPE_MAP = { - "resource_id" => "id", - MATCH_TYPE_PREFLABEL => MATCH_TYPE_PREFLABEL, - "prefLabelExact" => MATCH_TYPE_PREFLABEL, - "prefLabelSuggestEdge" => MATCH_TYPE_PREFLABEL, - "prefLabelSuggestNgram" => MATCH_TYPE_PREFLABEL, - MATCH_TYPE_SYNONYM => MATCH_TYPE_SYNONYM, - "synonymExact" => MATCH_TYPE_SYNONYM, - "synonymSuggestEdge" => MATCH_TYPE_SYNONYM, - "synonymSuggestNgram" => MATCH_TYPE_SYNONYM, - MATCH_TYPE_PROPERTY => MATCH_TYPE_PROPERTY, - MATCH_TYPE_LABEL => MATCH_TYPE_LABEL, - "labelExact" => MATCH_TYPE_LABEL, - "labelSuggestEdge" => MATCH_TYPE_LABEL, - "labelSuggestNgram" => MATCH_TYPE_LABEL, - MATCH_TYPE_LABELGENERATED => MATCH_TYPE_LABELGENERATED, - "labelGeneratedExact" => MATCH_TYPE_LABELGENERATED, - "labellabelGeneratedSuggestEdge" => MATCH_TYPE_LABELGENERATED, - "labellabelGeneratedSuggestNgram" => MATCH_TYPE_LABELGENERATED, - "notation" => "notation", - "cui" => "cui", - "semanticType" => "semanticType" + "resource_id" => "id", + MATCH_TYPE_PREFLABEL => MATCH_TYPE_PREFLABEL, + "prefLabelExact" => MATCH_TYPE_PREFLABEL, + "prefLabelSuggestEdge" => MATCH_TYPE_PREFLABEL, + "prefLabelSuggestNgram" => MATCH_TYPE_PREFLABEL, + MATCH_TYPE_SYNONYM => MATCH_TYPE_SYNONYM, + "synonymExact" => MATCH_TYPE_SYNONYM, + "synonymSuggestEdge" => MATCH_TYPE_SYNONYM, + "synonymSuggestNgram" => MATCH_TYPE_SYNONYM, + MATCH_TYPE_PROPERTY => MATCH_TYPE_PROPERTY, + MATCH_TYPE_LABEL => MATCH_TYPE_LABEL, + "labelExact" => MATCH_TYPE_LABEL, + "labelSuggestEdge" => MATCH_TYPE_LABEL, + "labelSuggestNgram" => MATCH_TYPE_LABEL, + MATCH_TYPE_LABELGENERATED => MATCH_TYPE_LABELGENERATED, + "labelGeneratedExact" => MATCH_TYPE_LABELGENERATED, + "labellabelGeneratedSuggestEdge" => MATCH_TYPE_LABELGENERATED, + "labellabelGeneratedSuggestNgram" => MATCH_TYPE_LABELGENERATED, + "notation" => "notation", + "cui" => "cui", + "semanticType" => "semanticType" } # list of fields that allow empty query text QUERYLESS_FIELDS_PARAMS = { - "ontologies" => nil, - "notation" => "notation", - "cui" => "cui", - "semantic_types" => "semanticType", - ONTOLOGY_TYPES_PARAM => "ontologyType", - ALSO_SEARCH_PROVISIONAL_PARAM => nil, - SUBTREE_ID_PARAM => nil + "ontologies" => nil, + "notation" => "notation", + "cui" => "cui", + "semantic_types" => "semanticType", + ONTOLOGY_TYPES_PARAM => "ontologyType", + ALSO_SEARCH_PROVISIONAL_PARAM => nil, + SUBTREE_ID_PARAM => nil } QUERYLESS_FIELDS_STR = QUERYLESS_FIELDS_PARAMS.values.compact.join(" ") - def get_term_search_query(text, params={}) + def get_term_search_query(text, params = {}) validate_params_solr_population(ALLOWED_INCLUDES_PARAMS) sort = params.delete('sort') # raise error if text is empty AND (none of the QUERYLESS_FIELDS_PARAMS has been passed # OR either an exact match OR suggest search is being executed) if text.nil? || text.strip.empty? - if !QUERYLESS_FIELDS_PARAMS.keys.any? {|k| params.key?(k)} || - params[EXACT_MATCH_PARAM] == "true" || - params[SUGGEST_PARAM] == "true" + if !QUERYLESS_FIELDS_PARAMS.keys.any? { |k| params.key?(k) } || + params[EXACT_MATCH_PARAM] == "true" || + params[SUGGEST_PARAM] == "true" raise error 400, "The search query must be provided via /search?q=[&page=&pagesize=]" else text = '' @@ -82,10 +82,6 @@ def get_term_search_query(text, params={}) end end - lang = params["lang"] || params["language"] - lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : "" - - query = "" params["defType"] = "edismax" params["stopwords"] = "true" params["lowercaseOperators"] = "true" @@ -97,19 +93,33 @@ def get_term_search_query(text, params={}) params["hl.simple.pre"] = MATCH_HTML_PRE params["hl.simple.post"] = MATCH_HTML_POST - # text.gsub!(/\*+$/, '') - if params[EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 prefLabel#{lang_suffix}^10 synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" - params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^20 #{add_lang_suffix('prefLabel', '^10')} #{add_lang_suffix('synonymExact')} #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id #{add_lang_suffix('prefLabelExact')} #{add_lang_suffix('synonymExact')} #{QUERYLESS_FIELDS_STR}" elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*' text.gsub!(/\*+$/, '') query = "\"#{solr_escape(text)}\"" params["qt"] = "/suggest_ncbo" - params["qf"] = " prefLabelExact#{lang_suffix}^100 prefLabelSuggestEdge#{lang_suffix}^50 synonym#{lang_suffix}SuggestEdge^10 prefLabel#{lang_suffix}SuggestNgram synonym#{lang_suffix}SuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" - params["pf"] = "prefLabelSuggest^50" - params["hl.fl"] = "prefLabelExact#{lang_suffix} prefLabelSuggestEdge#{lang_suffix} synonymSuggestEdge#{lang_suffix} prefLabelSuggestNgram#{lang_suffix} synonymSuggestNgram#{lang_suffix} resource_id #{QUERYLESS_FIELDS_STR}" + params["qf"] = [ + add_lang_suffix('prefLabelExact', '^100'), + add_lang_suffix('prefLabelSuggestEdge', '^50'), + add_lang_suffix('synonymSuggestEdge', '^10'), + add_lang_suffix('prefLabelSuggestNgram'), + add_lang_suffix('synonymSuggestNgram'), + "resource_id #{QUERYLESS_FIELDS_STR}" + ].join(' ') + + params["pf"] = add_lang_suffix('prefLabelSuggest', '^50') + + params["hl.fl"] = [ + add_lang_suffix('prefLabelExact'), + add_lang_suffix('prefLabelSuggestEdge'), + add_lang_suffix('synonymSuggestEdge'), + add_lang_suffix('prefLabelSuggestNgram'), + add_lang_suffix('synonymSuggestNgram'), + "resource_id #{QUERYLESS_FIELDS_STR}" + ].join(' ') else if text.strip.empty? query = '*' @@ -117,9 +127,19 @@ def get_term_search_query(text, params={}) query = solr_escape(text) end - params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix}^90 prefLabel#{lang_suffix}^70 synonymExact#{lang_suffix}^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" + params["qf"] = [ + "resource_id^100", + add_lang_suffix('prefLabelExact', '^90'), + add_lang_suffix('prefLabel', '^70'), + add_lang_suffix('synonymExact', '^50'), + add_lang_suffix('synonym', '^10'), + QUERYLESS_FIELDS_STR + ].join(' ') + params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true" - params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} prefLabel#{lang_suffix } synonymExact#{lang_suffix} synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + + params["hl.fl"] = "resource_id #{add_lang_suffix('prefLabelExact')} #{ add_lang_suffix('prefLabel')} #{add_lang_suffix('synonymExact')} #{add_lang_suffix('synonym')} #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true" end @@ -225,29 +245,61 @@ def portal_language Goo.main_languages.first end - def request_language - params['lang'] || params['languages'] || portal_language + def request_languages + lang = params['lang'] || params['languages'] + + return [portal_language] if lang.blank? + + lang.split(',') end + def request_multiple_languages? + request_languages.size > 1 || request_all_languages? + end + + def request_languages? + !(params['lang'] || params['language']).blank? + end + + def request_all_languages? + request_languages.first.eql?('all') + end + + def add_lang_suffix(attr, rank = "") + if request_languages? && !request_all_languages? + languages = request_languages + languages.map { |lang| "#{attr}_#{lang}#{rank} " }.join + else + "#{attr}#{rank}" + end + end def filter_attrs_by_language(doc) lang_values = {} doc.each do |k, v| attr, lang = k.to_s.split('_') - next unless lang + next if [:ontology_rank, :resource_id, :resource_model].include?(k) + next if lang.blank? || attr.blank? + next if !(request_languages + %w[none]).include?(lang) && !request_all_languages? - if lang.eql?('none') || request_language.eql?(lang) - lang_values[attr.to_sym] ||= [] - lang_values[attr.to_sym] = lang.eql?('none') ? lang_values[attr.to_sym] + v : v + lang_values[attr.to_sym] - end + lang_values[attr.to_sym] ||= {} + lang_values[attr.to_sym][lang] ||= [] + lang_values[attr.to_sym][lang] += v end - lang_values.each do |k, v| - doc[k] = v unless v.empty? + if request_multiple_languages? + lang_values.each do |k, lang_vals| + doc[k] = lang_vals + end + else + lang_values.each do |k, lang_vals| + doc[k] = lang_vals.map { |l, v| l.eql?('none') ? nil : v }.compact.flatten + Array(lang_vals['none']) + end + + doc[:prefLabel] = Array(doc["prefLabel_#{request_languages.first}".to_sym]).first || Array(doc[:prefLabel]).first end - doc[:prefLabel] = doc["prefLabel_#{request_language}".to_sym]&.first || doc[:prefLabel]&.first doc end diff --git a/test/controllers/test_search_controller.rb b/test/controllers/test_search_controller.rb index 459df9aa..9667606c 100644 --- a/test/controllers/test_search_controller.rb +++ b/test/controllers/test_search_controller.rb @@ -92,7 +92,7 @@ def test_search_ontology_filter assert last_response.ok? results = MultiJson.load(last_response.body) doc = results["collection"][0] - assert_equal "cell line", doc["prefLabel"].first + assert_equal "cell line", doc["prefLabel"] assert doc["links"]["ontology"].include? acronym results["collection"].each do |doc| acr = doc["links"]["ontology"].split('/')[-1] From 6abcaaa521251c96519f99d29f86731b05b1fb7c Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Fri, 28 Jun 2024 00:21:44 +0200 Subject: [PATCH 2/2] fix annotator prefLabel language selection --- helpers/search_helper.rb | 8 ++++++-- test/controllers/test_annotator_controller.rb | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index 8b679986..3805e650 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -274,6 +274,10 @@ def add_lang_suffix(attr, rank = "") end end + def pref_label_by_language(doc) + Array(doc["prefLabel_#{request_languages.first}".to_sym]).first || Array(doc["prefLabel_none".to_sym]).first || Array(doc[:prefLabel]).first + end + def filter_attrs_by_language(doc) lang_values = {} doc.each do |k, v| @@ -297,7 +301,7 @@ def filter_attrs_by_language(doc) doc[k] = lang_vals.map { |l, v| l.eql?('none') ? nil : v }.compact.flatten + Array(lang_vals['none']) end - doc[:prefLabel] = Array(doc["prefLabel_#{request_languages.first}".to_sym]).first || Array(doc[:prefLabel]).first + doc[:prefLabel] = pref_label_by_language(doc) end doc @@ -431,7 +435,7 @@ def populate_classes_from_search(classes, ontology_acronyms=nil) doc[:submission] = old_class.submission doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties) instance = LinkedData::Models::Class.read_only(doc) - instance.prefLabel = instance.prefLabel.first if instance.prefLabel.is_a?(Array) + instance.prefLabel = pref_label_by_language(doc) classes_hash[ont_uri_class_uri] = instance end diff --git a/test/controllers/test_annotator_controller.rb b/test/controllers/test_annotator_controller.rb index 572c8750..947d474e 100644 --- a/test/controllers/test_annotator_controller.rb +++ b/test/controllers/test_annotator_controller.rb @@ -265,16 +265,16 @@ def test_default_properties_output assert last_response.ok? annotations = MultiJson.load(last_response.body) assert_equal 9, annotations.length - annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].first.downcase <=> b["annotatedClass"]["prefLabel"].first.downcase } + annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].downcase <=> b["annotatedClass"]["prefLabel"].downcase } assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Aggregate_Human_Data", annotations.first["annotatedClass"]["@id"] - assert_equal "Aggregate Human Data", Array(annotations.first["annotatedClass"]["prefLabel"]).first + assert_equal "Aggregate Human Data", annotations.first["annotatedClass"]["prefLabel"] params = {text: text, include: "prefLabel,definition"} get "/annotator", params assert last_response.ok? annotations = MultiJson.load(last_response.body) assert_equal 9, annotations.length - annotations.sort! { |a,b| Array(a["annotatedClass"]["prefLabel"]).first.downcase <=> Array(b["annotatedClass"]["prefLabel"]).first.downcase } + annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].downcase <=> b["annotatedClass"]["prefLabel"].downcase } assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Aggregate_Human_Data", annotations.first["annotatedClass"]["@id"] assert_equal ["A resource that provides data from clinical care that comprises combined data from multiple individual human subjects."], annotations.first["annotatedClass"]["definition"] end @@ -354,7 +354,7 @@ def self.mapping_test_set class_id = terms_a[i] ont_acr = onts_a[i] sub = LinkedData::Models::Ontology.find(ont_acr).first.latest_submission(status: :any) - binding.pry if sub.nil? + sub.bring(ontology: [:acronym]) c = LinkedData::Models::Class.find(RDF::URI.new(class_id)) .in(sub)