From 7afc66161ae5d451c05937d9bab22eaa23417be4 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Tue, 5 Dec 2023 21:12:50 +0100 Subject: [PATCH] Merge to master: Release 2.3.4 - Multilingual (#42) * Merge pull request #34 from ontoportal-lirmm/feature/paginate-and-filter-ontologies-endpoint Feature: Implement pagination and filters to submissions endpoint * Merge pull request #36 from ontoportal-lirmm/feature/paginate-and-filter-ontologies-endpoint Feature: Add oder by and filters for ontologies endpoint * Merge pull request #32 from ontoportal-lirmm/feature/support-multilingual-read-one-language-from-request-parameter Feature: Support multilingual - Add request_lang middleware * Feature: Add support of multilingual search (#40) * update get_term_search_query to support multilanguages search * rename var * fix search lang suffix to use underscore not @ * add multilangual search test --------- Co-authored-by: Syphax Bouazzouni --------- Co-authored-by: HADDAD Zineddine --- Gemfile.lock | 6 +-- app.rb | 6 +++ helpers/search_helper.rb | 15 ++++--- lib/rack/request_lang.rb | 16 +++++++ .../test_ontology_submissions_controller.rb | 15 +++++++ test/controllers/test_search_controller.rb | 44 +++++++++++++++++++ test/data/ontology_files/BRO_v3.2.owl | 3 ++ test/solr/docker-compose.yml | 13 ++++++ test/solr/generate_ncbo_configsets.sh | 35 ++++++++------- 9 files changed, 129 insertions(+), 24 deletions(-) create mode 100644 lib/rack/request_lang.rb create mode 100644 test/solr/docker-compose.yml diff --git a/Gemfile.lock b/Gemfile.lock index 2612b968..3428c7b2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -247,7 +247,7 @@ GEM rack (>= 0.4) rack-attack (6.6.1) rack (>= 1.0, < 3) - rack-cache (1.13.0) + rack-cache (1.14.0) rack (>= 0.4) rack-cors (1.0.6) rack (>= 1.6.0) @@ -270,8 +270,8 @@ GEM redis-rack-cache (2.2.1) rack-cache (>= 1.10, < 2) redis-store (>= 1.6, < 2) - redis-store (1.9.1) - redis (>= 4, < 5) + redis-store (1.9.2) + redis (>= 4, < 6) representable (3.2.0) declarative (< 0.1.0) trailblazer-option (>= 0.1.1, < 0.2.0) diff --git a/app.rb b/app.rb index 5360ae4b..46457c86 100644 --- a/app.rb +++ b/app.rb @@ -29,6 +29,7 @@ require_relative 'lib/rack/cube_reporter' require_relative 'lib/rack/param_translator' require_relative 'lib/rack/slice_detection' +require_relative 'lib/rack/request_lang' # Logging setup require_relative "config/logging" @@ -36,6 +37,8 @@ # Inflector setup require_relative "config/inflections" +require 'request_store' + # Protection settings set :protection, :except => :path_traversal @@ -143,6 +146,9 @@ use Rack::PostBodyToParams use Rack::ParamTranslator +use RequestStore::Middleware +use Rack::RequestLang + use LinkedData::Security::Authorization use LinkedData::Security::AccessDenied diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index 5d37d884..071000d9 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -82,6 +82,9 @@ def get_term_search_query(text, params={}) end end + lang = params["lang"] || params["language"] + lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : "" + query = "" params["defType"] = "edismax" params["stopwords"] = "true" @@ -98,15 +101,15 @@ def get_term_search_query(text, params={}) if params[EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}" - params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*' text.gsub!(/\*+$/, '') query = "\"#{solr_escape(text)}\"" params["qt"] = "/suggest_ncbo" - params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" params["pf"] = "prefLabelSuggest^50" - params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" else if text.strip.empty? query = '*' @@ -114,9 +117,9 @@ def get_term_search_query(text, params={}) query = solr_escape(text) end - params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true" - params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true" end diff --git a/lib/rack/request_lang.rb b/lib/rack/request_lang.rb new file mode 100644 index 00000000..b2221041 --- /dev/null +++ b/lib/rack/request_lang.rb @@ -0,0 +1,16 @@ +module Rack + class RequestLang + + def initialize(app = nil, options = {}) + @app = app + end + + def call(env) + r = Rack::Request.new(env) + lang = r.params["lang"] || r.params["language"] + lang = lang.upcase.to_sym if lang + RequestStore.store[:requested_lang] = lang + @app.call(env) + end + end +end \ No newline at end of file diff --git a/test/controllers/test_ontology_submissions_controller.rb b/test/controllers/test_ontology_submissions_controller.rb index 8c4cb098..77b6e6bc 100644 --- a/test/controllers/test_ontology_submissions_controller.rb +++ b/test/controllers/test_ontology_submissions_controller.rb @@ -201,6 +201,21 @@ def test_download_acl_only end end + def test_submissions_pagination + num_onts_created, created_ont_acronyms = create_ontologies_and_submissions(ont_count: 2, submission_count: 2) + + get "/submissions" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + + assert_equal 2, submissions.length + + + get "/submissions?page=1&pagesize=1" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + assert_equal 1, submissions["collection"].length + end def test_submissions_default_includes diff --git a/test/controllers/test_search_controller.rb b/test/controllers/test_search_controller.rb index 21a3dd18..74be75d2 100644 --- a/test/controllers/test_search_controller.rb +++ b/test/controllers/test_search_controller.rb @@ -213,4 +213,48 @@ def test_search_provisional_class assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first end + def test_multilingual_search + get "/search?q=Activity&ontologies=BROSEARCHTEST-0" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + + doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + refute_nil doc + + #res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main) + #refute_equal 0, res["response"]["numFound"] + #refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en" + res = MultiJson.load(last_response.body) + refute_equal 0, res["totalCount"] + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true" + res = MultiJson.load(last_response.body) + assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true" + res = MultiJson.load(last_response.body) + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activity&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true" + res = MultiJson.load(last_response.body) + assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true" + res = MultiJson.load(last_response.body) + refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + + + end + end diff --git a/test/data/ontology_files/BRO_v3.2.owl b/test/data/ontology_files/BRO_v3.2.owl index d64075cc..b2aeccf5 100644 --- a/test/data/ontology_files/BRO_v3.2.owl +++ b/test/data/ontology_files/BRO_v3.2.owl @@ -631,6 +631,9 @@ Activity + Activity + ActivityEnglish + Activité Activity of interest that may be related to a BRO:Resource. activities diff --git a/test/solr/docker-compose.yml b/test/solr/docker-compose.yml new file mode 100644 index 00000000..3ddae69c --- /dev/null +++ b/test/solr/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.8' + +services: + op_solr: + image: solr:8.8 + volumes: + - ./solr_configsets:/configsets:ro + ports: + - "8983:8983" + command: > + bash -c "precreate-core term_search_core1 /configsets/term_search + && precreate-core prop_search_core1 /configsets/property_search + && solr-foreground" diff --git a/test/solr/generate_ncbo_configsets.sh b/test/solr/generate_ncbo_configsets.sh index 893f7f3a..7b4281f7 100755 --- a/test/solr/generate_ncbo_configsets.sh +++ b/test/solr/generate_ncbo_configsets.sh @@ -2,18 +2,23 @@ # generates solr configsets by merging _default configset with config files in config/solr # _default is copied from sorl distribuion solr-8.10.1/server/solr/configsets/_default/ -pushd solr/configsets -ld_config='../../../../ontologies_linked_data/config/solr/' -#ld_config='../../../../config/solr/' -ls -l $ld_config -pwd -[ -d property_search ] && rm -Rf property_search -[ -d term_search ] && rm -Rf property_search -[ -d $ld_config/property_search ] || echo "cant find ontologies_linked_data project" -mkdir -p property_search/conf -mkdir -p term_search/conf -cp -a _default/conf/* property_search/conf/ -cp -a _default/conf/* term_search/conf/ -cp -a $ld_config/property_search/* property_search/conf -cp -a $ld_config/term_search/* term_search/conf -popd +#cd solr/configsets +ld_config='config/solr' +configsets='test/solr/configsets' +[ -d ${configsets}/property_search ] && rm -Rf ${configsets}/property_search +[ -d ${configsets}/term_search ] && rm -Rf ${configsets}/term_search +if [[ ! -d ${ld_config}/property_search ]]; then + echo 'cant find ld solr config sets' + exit 1 +fi +if [[ ! -d ${configsets}/_default/conf ]]; then + echo 'cant find default solr configset' + exit 1 +fi +mkdir -p ${configsets}/property_search/conf +mkdir -p ${configsets}/term_search/conf +cp -a ${configsets}/_default/conf/* ${configsets}/property_search/conf/ +cp -a ${configsets}/_default/conf/* ${configsets}/term_search/conf/ +cp -a $ld_config/property_search/* ${configsets}/property_search/conf +cp -a $ld_config/term_search/* ${configsets}/term_search/conf +