Skip to content

Commit

Permalink
Merge pull request #82 from ontoportal-lirmm/fix/search-multilingual-…
Browse files Browse the repository at this point in the history
…filter

Fix: Search multilingual values filter
  • Loading branch information
Bilelkihal authored Jul 24, 2024
2 parents 67bc9fb + 6abcaaa commit 4534006
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 58 deletions.
3 changes: 3 additions & 0 deletions controllers/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ def process_search(params = nil)
doc[:submission] = submission
doc[:ontology_rank] = (ontology_rank[doc[:submissionAcronym]] && !ontology_rank[doc[:submissionAcronym]].empty?) ? ontology_rank[doc[:submissionAcronym]][:normalizedScore] : 0.0
doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties)

doc = filter_attrs_by_language(doc)

instance = doc[:provisional] ? LinkedData::Models::ProvisionalClass.read_only(doc) : LinkedData::Models::Class.read_only(doc)
docs.push(instance)
end
Expand Down
179 changes: 133 additions & 46 deletions helpers/search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,62 +30,58 @@ module SearchHelper
MATCH_TYPE_LABELGENERATED = "labelGenerated"

MATCH_TYPE_MAP = {
"resource_id" => "id",
MATCH_TYPE_PREFLABEL => MATCH_TYPE_PREFLABEL,
"prefLabelExact" => MATCH_TYPE_PREFLABEL,
"prefLabelSuggestEdge" => MATCH_TYPE_PREFLABEL,
"prefLabelSuggestNgram" => MATCH_TYPE_PREFLABEL,
MATCH_TYPE_SYNONYM => MATCH_TYPE_SYNONYM,
"synonymExact" => MATCH_TYPE_SYNONYM,
"synonymSuggestEdge" => MATCH_TYPE_SYNONYM,
"synonymSuggestNgram" => MATCH_TYPE_SYNONYM,
MATCH_TYPE_PROPERTY => MATCH_TYPE_PROPERTY,
MATCH_TYPE_LABEL => MATCH_TYPE_LABEL,
"labelExact" => MATCH_TYPE_LABEL,
"labelSuggestEdge" => MATCH_TYPE_LABEL,
"labelSuggestNgram" => MATCH_TYPE_LABEL,
MATCH_TYPE_LABELGENERATED => MATCH_TYPE_LABELGENERATED,
"labelGeneratedExact" => MATCH_TYPE_LABELGENERATED,
"labellabelGeneratedSuggestEdge" => MATCH_TYPE_LABELGENERATED,
"labellabelGeneratedSuggestNgram" => MATCH_TYPE_LABELGENERATED,
"notation" => "notation",
"cui" => "cui",
"semanticType" => "semanticType"
"resource_id" => "id",
MATCH_TYPE_PREFLABEL => MATCH_TYPE_PREFLABEL,
"prefLabelExact" => MATCH_TYPE_PREFLABEL,
"prefLabelSuggestEdge" => MATCH_TYPE_PREFLABEL,
"prefLabelSuggestNgram" => MATCH_TYPE_PREFLABEL,
MATCH_TYPE_SYNONYM => MATCH_TYPE_SYNONYM,
"synonymExact" => MATCH_TYPE_SYNONYM,
"synonymSuggestEdge" => MATCH_TYPE_SYNONYM,
"synonymSuggestNgram" => MATCH_TYPE_SYNONYM,
MATCH_TYPE_PROPERTY => MATCH_TYPE_PROPERTY,
MATCH_TYPE_LABEL => MATCH_TYPE_LABEL,
"labelExact" => MATCH_TYPE_LABEL,
"labelSuggestEdge" => MATCH_TYPE_LABEL,
"labelSuggestNgram" => MATCH_TYPE_LABEL,
MATCH_TYPE_LABELGENERATED => MATCH_TYPE_LABELGENERATED,
"labelGeneratedExact" => MATCH_TYPE_LABELGENERATED,
"labellabelGeneratedSuggestEdge" => MATCH_TYPE_LABELGENERATED,
"labellabelGeneratedSuggestNgram" => MATCH_TYPE_LABELGENERATED,
"notation" => "notation",
"cui" => "cui",
"semanticType" => "semanticType"
}

# list of fields that allow empty query text
QUERYLESS_FIELDS_PARAMS = {
"ontologies" => nil,
"notation" => "notation",
"cui" => "cui",
"semantic_types" => "semanticType",
ONTOLOGY_TYPES_PARAM => "ontologyType",
ALSO_SEARCH_PROVISIONAL_PARAM => nil,
SUBTREE_ID_PARAM => nil
"ontologies" => nil,
"notation" => "notation",
"cui" => "cui",
"semantic_types" => "semanticType",
ONTOLOGY_TYPES_PARAM => "ontologyType",
ALSO_SEARCH_PROVISIONAL_PARAM => nil,
SUBTREE_ID_PARAM => nil
}

QUERYLESS_FIELDS_STR = QUERYLESS_FIELDS_PARAMS.values.compact.join(" ")

def get_term_search_query(text, params={})
def get_term_search_query(text, params = {})
validate_params_solr_population(ALLOWED_INCLUDES_PARAMS)
sort = params.delete('sort')
# raise error if text is empty AND (none of the QUERYLESS_FIELDS_PARAMS has been passed
# OR either an exact match OR suggest search is being executed)
if text.nil? || text.strip.empty?
if !QUERYLESS_FIELDS_PARAMS.keys.any? {|k| params.key?(k)} ||
params[EXACT_MATCH_PARAM] == "true" ||
params[SUGGEST_PARAM] == "true"
if !QUERYLESS_FIELDS_PARAMS.keys.any? { |k| params.key?(k) } ||
params[EXACT_MATCH_PARAM] == "true" ||
params[SUGGEST_PARAM] == "true"
raise error 400, "The search query must be provided via /search?q=<query>[&page=<pagenum>&pagesize=<pagesize>]"
else
text = ''
params['sort'] = 'prefLabelExact asc, submissionAcronym asc' if sort == 'prefLabel'
end
end

lang = params["lang"] || params["language"]
lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : ""

query = ""
params["defType"] = "edismax"
params["stopwords"] = "true"
params["lowercaseOperators"] = "true"
Expand All @@ -97,29 +93,53 @@ def get_term_search_query(text, params={})
params["hl.simple.pre"] = MATCH_HTML_PRE
params["hl.simple.post"] = MATCH_HTML_POST

# text.gsub!(/\*+$/, '')

if params[EXACT_MATCH_PARAM] == "true"
query = "\"#{solr_escape(text)}\""
params["qf"] = "resource_id^20 prefLabel#{lang_suffix}^10 synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^20 #{add_lang_suffix('prefLabel', '^10')} #{add_lang_suffix('synonymExact')} #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id #{add_lang_suffix('prefLabelExact')} #{add_lang_suffix('synonymExact')} #{QUERYLESS_FIELDS_STR}"
elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*'
text.gsub!(/\*+$/, '')
query = "\"#{solr_escape(text)}\""
params["qt"] = "/suggest_ncbo"
params["qf"] = " prefLabelExact#{lang_suffix}^100 prefLabelSuggestEdge#{lang_suffix}^50 synonym#{lang_suffix}SuggestEdge^10 prefLabel#{lang_suffix}SuggestNgram synonym#{lang_suffix}SuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["pf"] = "prefLabelSuggest^50"
params["hl.fl"] = "prefLabelExact#{lang_suffix} prefLabelSuggestEdge#{lang_suffix} synonymSuggestEdge#{lang_suffix} prefLabelSuggestNgram#{lang_suffix} synonymSuggestNgram#{lang_suffix} resource_id #{QUERYLESS_FIELDS_STR}"
params["qf"] = [
add_lang_suffix('prefLabelExact', '^100'),
add_lang_suffix('prefLabelSuggestEdge', '^50'),
add_lang_suffix('synonymSuggestEdge', '^10'),
add_lang_suffix('prefLabelSuggestNgram'),
add_lang_suffix('synonymSuggestNgram'),
"resource_id #{QUERYLESS_FIELDS_STR}"
].join(' ')

params["pf"] = add_lang_suffix('prefLabelSuggest', '^50')

params["hl.fl"] = [
add_lang_suffix('prefLabelExact'),
add_lang_suffix('prefLabelSuggestEdge'),
add_lang_suffix('synonymSuggestEdge'),
add_lang_suffix('prefLabelSuggestNgram'),
add_lang_suffix('synonymSuggestNgram'),
"resource_id #{QUERYLESS_FIELDS_STR}"
].join(' ')
else
if text.strip.empty?
query = '*'
else
query = solr_escape(text)
end

params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix}^90 prefLabel#{lang_suffix}^70 synonymExact#{lang_suffix}^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] = [
"resource_id^100",
add_lang_suffix('prefLabelExact', '^90'),
add_lang_suffix('prefLabel', '^70'),
add_lang_suffix('synonymExact', '^50'),
add_lang_suffix('synonym', '^10'),
QUERYLESS_FIELDS_STR
].join(' ')

params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} prefLabel#{lang_suffix } synonymExact#{lang_suffix} synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}"

params["hl.fl"] = "resource_id #{add_lang_suffix('prefLabelExact')} #{ add_lang_suffix('prefLabel')} #{add_lang_suffix('synonymExact')} #{add_lang_suffix('synonym')} #{QUERYLESS_FIELDS_STR}"

params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
end

Expand Down Expand Up @@ -221,6 +241,73 @@ def add_matched_fields(solr_response, default_match)
solr_response["match_types"] = all_matches
end

def portal_language
Goo.main_languages.first
end

def request_languages
lang = params['lang'] || params['languages']

return [portal_language] if lang.blank?

lang.split(',')
end

def request_multiple_languages?
request_languages.size > 1 || request_all_languages?
end

def request_languages?
!(params['lang'] || params['language']).blank?
end

def request_all_languages?
request_languages.first.eql?('all')
end

def add_lang_suffix(attr, rank = "")
if request_languages? && !request_all_languages?
languages = request_languages
languages.map { |lang| "#{attr}_#{lang}#{rank} " }.join
else
"#{attr}#{rank}"
end
end

def pref_label_by_language(doc)
Array(doc["prefLabel_#{request_languages.first}".to_sym]).first || Array(doc["prefLabel_none".to_sym]).first || Array(doc[:prefLabel]).first
end

def filter_attrs_by_language(doc)
lang_values = {}
doc.each do |k, v|
attr, lang = k.to_s.split('_')

next if [:ontology_rank, :resource_id, :resource_model].include?(k)
next if lang.blank? || attr.blank?
next if !(request_languages + %w[none]).include?(lang) && !request_all_languages?

lang_values[attr.to_sym] ||= {}
lang_values[attr.to_sym][lang] ||= []
lang_values[attr.to_sym][lang] += v
end

if request_multiple_languages?
lang_values.each do |k, lang_vals|
doc[k] = lang_vals
end
else
lang_values.each do |k, lang_vals|
doc[k] = lang_vals.map { |l, v| l.eql?('none') ? nil : v }.compact.flatten + Array(lang_vals['none'])
end

doc[:prefLabel] = pref_label_by_language(doc)
end

doc
end


# see https://github.com/rsolr/rsolr/issues/101
# and https://github.com/projecthydra/active_fedora/commit/75b4afb248ee61d9edb56911b2ef51f30f1ce17f
#
Expand Down Expand Up @@ -348,7 +435,7 @@ def populate_classes_from_search(classes, ontology_acronyms=nil)
doc[:submission] = old_class.submission
doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties)
instance = LinkedData::Models::Class.read_only(doc)
instance.prefLabel = instance.prefLabel.first if instance.prefLabel.is_a?(Array)
instance.prefLabel = pref_label_by_language(doc)
classes_hash[ont_uri_class_uri] = instance
end

Expand Down
8 changes: 4 additions & 4 deletions test/controllers/test_annotator_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -265,16 +265,16 @@ def test_default_properties_output
assert last_response.ok?
annotations = MultiJson.load(last_response.body)
assert_equal 9, annotations.length
annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].first.downcase <=> b["annotatedClass"]["prefLabel"].first.downcase }
annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].downcase <=> b["annotatedClass"]["prefLabel"].downcase }
assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Aggregate_Human_Data", annotations.first["annotatedClass"]["@id"]
assert_equal "Aggregate Human Data", Array(annotations.first["annotatedClass"]["prefLabel"]).first
assert_equal "Aggregate Human Data", annotations.first["annotatedClass"]["prefLabel"]

params = {text: text, include: "prefLabel,definition"}
get "/annotator", params
assert last_response.ok?
annotations = MultiJson.load(last_response.body)
assert_equal 9, annotations.length
annotations.sort! { |a,b| Array(a["annotatedClass"]["prefLabel"]).first.downcase <=> Array(b["annotatedClass"]["prefLabel"]).first.downcase }
annotations.sort! { |a,b| a["annotatedClass"]["prefLabel"].downcase <=> b["annotatedClass"]["prefLabel"].downcase }
assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#Aggregate_Human_Data", annotations.first["annotatedClass"]["@id"]
assert_equal ["A resource that provides data from clinical care that comprises combined data from multiple individual human subjects."], annotations.first["annotatedClass"]["definition"]
end
Expand Down Expand Up @@ -354,7 +354,7 @@ def self.mapping_test_set
class_id = terms_a[i]
ont_acr = onts_a[i]
sub = LinkedData::Models::Ontology.find(ont_acr).first.latest_submission(status: :any)
binding.pry if sub.nil?

sub.bring(ontology: [:acronym])
c = LinkedData::Models::Class.find(RDF::URI.new(class_id))
.in(sub)
Expand Down
44 changes: 36 additions & 8 deletions test/controllers/test_search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_search_ontology_filter
assert last_response.ok?
results = MultiJson.load(last_response.body)
doc = results["collection"][0]
assert_equal "cell line", doc["prefLabel"].first
assert_equal "cell line", doc["prefLabel"]
assert doc["links"]["ontology"].include? acronym
results["collection"].each do |doc|
acr = doc["links"]["ontology"].split('/')[-1]
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_search_other_filters
.join(' ')
.include?("Funding Resource")
end
assert_equal "Funding Resource", results["collection"][0]["prefLabel"].first
assert_equal "Funding Resource", results["collection"][0]["prefLabel"]
assert_equal "T028", results["collection"][0]["semanticType"][0]
assert_equal "X123456", results["collection"][0]["cui"][0]

Expand Down Expand Up @@ -208,7 +208,7 @@ def test_search_provisional_class
assert_includes [10, 6], results["collection"].length # depending if owlapi import SKOS concepts
provisional = results["collection"].select {|res| assert_equal ontology_type, res["ontologyType"]; res["provisional"]}
assert_equal 1, provisional.length
assert_equal @@test_pc_root.label, provisional[0]["prefLabel"].first
assert_equal @@test_pc_root.label, provisional[0]["prefLabel"]

# subtree root with provisional class test
get "search?ontology=#{acronym}&subtree_root_id=#{CGI::escape(@@cls_uri.to_s)}&also_search_provisional=true"
Expand All @@ -217,32 +217,60 @@ def test_search_provisional_class

provisional = results["collection"].select {|res| res["provisional"]}
assert_equal 1, provisional.length
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"]
end

def test_multilingual_search
get "/search?q=Activity&ontologies=BROSEARCHTEST-0"
res = MultiJson.load(last_response.body)
res = MultiJson.load(last_response.body)

refute_equal 0, res["totalCount"]

doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc
assert_equal "ActivityEnglish", doc["prefLabel"]

res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80})
refute_equal 0, res["response"]["numFound"]
refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

doc = res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc
assert_equal "Activité", doc["prefLabel"]


get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
doc = res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc
assert_equal "ActivityEnglish", doc["prefLabel"]


get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr,es"
res = MultiJson.load(last_response.body)
assert_equal 0, res["totalCount"]

get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en,es"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
doc = res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc
expected_pref_label = {"none"=>["Activity"], "en"=>["ActivityEnglish"]}
assert_equal expected_pref_label, doc["prefLabel"]

get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=all"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
doc = res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc
expected_pref_label = {"none"=>["Activity"], "en"=>["ActivityEnglish"], "fr"=>["Activité"]}
assert_equal expected_pref_label, doc["prefLabel"]



get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
Expand Down

0 comments on commit 4534006

Please sign in to comment.