Skip to content

Commit

Permalink
Feature: Add support of multilingual search (#40)
Browse files Browse the repository at this point in the history
* update get_term_search_query to support multilanguages search

* rename var

* fix search lang suffix to use underscore not @

* add multilangual search test

---------

Co-authored-by: Syphax Bouazzouni <[email protected]>
  • Loading branch information
haddadzineddine and syphax-bouazzouni committed Sep 5, 2023
1 parent c0c5e0e commit dcea9e4
Show file tree
Hide file tree
Showing 13 changed files with 198 additions and 72 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ tmp/*
# Editor temp files
*.swp
*.swo
test/solr
53 changes: 26 additions & 27 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ncbo/ncbo_ontology_recommender.git
revision: d0ac992c88bd417f2f2137ba62934c3c41b6db7c
revision: 83e835de368bc9f19da800a477982e0ad770900d
branch: master
specs:
ncbo_ontology_recommender (0.0.1)
Expand All @@ -11,7 +11,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/goo.git
revision: b769c165906163e30a026dba511ae1069c4eed3d
revision: ddb95e427950fde3ac715aec340394208c8166fe
branch: development
specs:
goo (0.0.2)
Expand Down Expand Up @@ -53,7 +53,7 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git
revision: 69756f8a7cff39d283065217559c68820d6d95e3
revision: 4c89c8346766d23e09b24c8e29750bf3a91e6b53
branch: development
specs:
ontologies_linked_data (0.0.1)
Expand Down Expand Up @@ -103,16 +103,16 @@ GEM
activesupport (3.2.22.5)
i18n (~> 0.6, >= 0.6.4)
multi_json (~> 1.0)
addressable (2.8.4)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
airbrussh (1.4.1)
airbrussh (1.4.2)
sshkit (>= 1.6.1, != 1.7.0)
backports (3.24.1)
bcrypt (3.1.18)
bcrypt (3.1.19)
bcrypt_pbkdf (1.1.0)
bigdecimal (1.4.2)
builder (3.2.4)
capistrano (3.17.2)
capistrano (3.17.3)
airbrussh (>= 1.0.0)
i18n
rake (>= 10.0.0)
Expand Down Expand Up @@ -162,7 +162,7 @@ GEM
ffi (~> 1.0)
google-apis-analytics_v3 (0.13.0)
google-apis-core (>= 0.11.0, < 2.a)
google-apis-core (0.11.0)
google-apis-core (0.11.1)
addressable (~> 2.5, >= 2.5.1)
googleauth (>= 0.16.2, < 2.a)
httpclient (>= 2.8.1, < 3.a)
Expand All @@ -171,7 +171,7 @@ GEM
retriable (>= 2.0, < 4.a)
rexml
webrick
googleauth (1.5.2)
googleauth (1.7.0)
faraday (>= 0.17.3, < 3.a)
jwt (>= 1.4, < 3.0)
memoist (~> 0.16)
Expand All @@ -191,9 +191,9 @@ GEM
json-schema (2.8.1)
addressable (>= 2.4)
json_pure (2.6.3)
jwt (2.7.0)
jwt (2.7.1)
kgio (2.11.4)
libxml-ruby (4.1.0)
libxml-ruby (4.1.1)
logger (1.5.3)
macaddr (1.7.2)
systemu (~> 2.6.5)
Expand All @@ -204,18 +204,18 @@ GEM
net-smtp
memoist (0.16.2)
method_source (1.0.0)
mime-types (3.4.1)
mime-types (3.5.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.0218.1)
mini_mime (1.1.2)
mime-types-data (3.2023.0808)
mini_mime (1.1.5)
minitest (4.7.5)
minitest-stub_any_instance (1.0.3)
mlanett-redis-lock (0.2.7)
redis
multi_json (1.15.0)
multipart-post (2.3.0)
net-http-persistent (2.9.4)
net-imap (0.3.4)
net-imap (0.3.7)
date
net-protocol
net-pop (0.1.2)
Expand All @@ -226,9 +226,9 @@ GEM
net-ssh (>= 2.6.5, < 8.0.0)
net-smtp (0.3.3)
net-protocol
net-ssh (7.0.1)
net-ssh (7.2.0)
netrc (0.11.0)
newrelic_rpm (9.0.0)
newrelic_rpm (9.4.2)
oj (2.18.5)
omni_logger (0.1.4)
logger
Expand All @@ -239,21 +239,21 @@ GEM
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.0.1)
public_suffix (5.0.3)
rack (1.6.13)
rack-accept (0.4.5)
rack (>= 0.4)
rack-attack (6.6.1)
rack (>= 1.0, < 3)
rack-cache (1.13.0)
rack-cache (1.14.0)
rack (>= 0.4)
rack-cors (1.0.6)
rack (>= 1.6.0)
rack-mini-profiler (3.1.0)
rack-mini-profiler (3.1.1)
rack (>= 1.2.0)
rack-protection (1.5.5)
rack
rack-test (2.0.2)
rack-test (2.1.0)
rack (>= 1.3)
rack-timeout (0.6.3)
raindrops (0.20.1)
Expand Down Expand Up @@ -282,7 +282,7 @@ GEM
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
retriable (3.1.2)
rexml (3.2.5)
rexml (3.2.6)
rsolr (2.5.0)
builder (>= 2.1.2)
faraday (>= 0.9, < 3, != 2.0.0)
Expand Down Expand Up @@ -318,13 +318,13 @@ GEM
rack-test
sinatra (~> 1.4.0)
tilt (>= 1.3, < 3)
sshkit (1.21.4)
sshkit (1.21.5)
net-scp (>= 1.1.2)
net-ssh (>= 2.8.0)
systemu (2.6.5)
temple (0.10.0)
tilt (2.1.0)
timeout (0.3.2)
temple (0.10.2)
tilt (2.2.0)
timeout (0.4.0)
trailblazer-option (0.1.2)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
Expand All @@ -346,7 +346,6 @@ PLATFORMS
x86_64-darwin-21
x86_64-linux


DEPENDENCIES
activesupport (~> 3.0)
bcrypt_pbkdf (>= 1.0, < 2.0)
Expand Down
16 changes: 15 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,14 @@ services:

redis-ut:
image: redis
ports:
- 6379:6379

4store-ut:
image: bde2020/4store
#volume: fourstore:/var/lib/4store
ports:
- 9000:9000
command: >
bash -c "4s-backend-setup --segments 4 ontoportal_kb
&& 4s-backend ontoportal_kb
Expand All @@ -88,10 +92,20 @@ services:


solr-ut:
image: ontoportal/solr-ut:0.1
image: solr:8
volumes:
- ./test/solr/configsets:/configsets:ro
ports:
- "8983:8983"
command: >
bash -c "precreate-core term_search_core1 /configsets/term_search
&& precreate-core prop_search_core1 /configsets/property_search
&& solr-foreground"
mgrep-ut:
image: ontoportal/mgrep-ncbo:0.1
ports:
- "55556:55555"

agraph-ut:
image: franzinc/agraph:v7.3.0
Expand Down
16 changes: 10 additions & 6 deletions helpers/search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def get_term_search_query(text, params={})
end
end

lang = params["lang"] || params["language"]
lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : ""

query = ""
params["defType"] = "edismax"
params["stopwords"] = "true"
Expand All @@ -98,25 +101,25 @@ def get_term_search_query(text, params={})

if params[EXACT_MATCH_PARAM] == "true"
query = "\"#{solr_escape(text)}\""
params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*'
text.gsub!(/\*+$/, '')
query = "\"#{solr_escape(text)}\""
params["qt"] = "/suggest_ncbo"
params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["pf"] = "prefLabelSuggest^50"
params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
else
if text.strip.empty?
query = '*'
else
query = solr_escape(text)
end

params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
end

Expand Down Expand Up @@ -345,6 +348,7 @@ def populate_classes_from_search(classes, ontology_acronyms=nil)
doc[:submission] = old_class.submission
doc[:properties] = MultiJson.load(doc.delete(:propertyRaw)) if include_param_contains?(:properties)
instance = LinkedData::Models::Class.read_only(doc)
instance.prefLabel = instance.prefLabel.first if instance.prefLabel.is_a?(Array)
classes_hash[ont_uri_class_uri] = instance
end

Expand Down
4 changes: 2 additions & 2 deletions test/controllers/test_ontologies_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ def test_download_acl_only
begin
allowed_user = User.new({
username: "allowed",
email: "test@example.org",
email: "test1@example.org",
password: "12345"
})
allowed_user.save
blocked_user = User.new({
username: "blocked",
email: "test@example.org",
email: "test2@example.org",
password: "12345"
})
blocked_user.save
Expand Down
9 changes: 6 additions & 3 deletions test/controllers/test_ontology_submissions_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ def self._set_vars
administeredBy: "tim",
"file" => Rack::Test::UploadedFile.new(@@test_file, ""),
released: DateTime.now.to_s,
contact: [{name: "test_name", email: "[email protected]"}]
contact: [{name: "test_name", email: "[email protected]"}],
URI: 'https://test.com/test',
status: 'production',
description: 'ontology description'
}
@@status_uploaded = "UPLOADED"
@@status_rdf = "RDF"
Expand Down Expand Up @@ -156,13 +159,13 @@ def test_download_acl_only
begin
allowed_user = User.new({
username: "allowed",
email: "test@example.org",
email: "test4@example.org",
password: "12345"
})
allowed_user.save
blocked_user = User.new({
username: "blocked",
email: "test@example.org",
email: "test5@example.org",
password: "12345"
})
blocked_user.save
Expand Down
Loading

0 comments on commit dcea9e4

Please sign in to comment.