diff --git a/.dockerignore b/.dockerignore index 907fcb07..53b9b1b0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,9 @@ -# Git +.bundle .git .gitignore +.github +.DS_Store +vendor/bundle # Logs log/* # Temp files diff --git a/.github/workflows/ruby-unit-tests.yml b/.github/workflows/ruby-unit-tests.yml index 6a30c87b..c4268d19 100644 --- a/.github/workflows/ruby-unit-tests.yml +++ b/.github/workflows/ruby-unit-tests.yml @@ -7,11 +7,25 @@ on: jobs: test: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + backend: ['ruby', 'ruby-agraph'] # api runs tests with 4store backend and api-agraph runs with AllegroGraph backend steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: copy config file - run: cp config/config.rb.test config/config.rb + run: cp config/config.test.rb config/config.rb - name: Build docker-compose run: docker-compose build - name: Run unit tests - run: docker-compose up --exit-code-from unit-test + # unit tests are run inside a container + # http://docs.codecov.io/docs/testing-with-docker + run: | + ci_env=`bash <(curl -s https://codecov.io/env)` + docker-compose run $ci_env -e CI ${{ matrix.backend }} bundle exec rake test TESTOPTS='-v' + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v3 + with: + flags: unittests + verbose: true + fail_ci_if_error: false # optional (default = false) diff --git a/Dockerfile b/Dockerfile index 4d97982e..5016faa1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,22 @@ -FROM ruby:2.6 +ARG RUBY_VERSION +ARG DISTRO_NAME=bullseye + +FROM ruby:$RUBY_VERSION-$DISTRO_NAME + +RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ + openjdk-11-jre-headless \ + raptor2-utils \ + && rm -rf /var/lib/apt/lists/* -RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends openjdk-11-jre-headless raptor2-utils # The Gemfile Caching Trick RUN mkdir -p /srv/ontoportal/ncbo_annotator -COPY Gemfile* /srv/ontoportal/ncbo_annotator/ +COPY *.gemspec Gemfile* /srv/ontoportal/ncbo_annotator/ + WORKDIR /srv/ontoportal/ncbo_annotator -RUN gem install bundler -v "$(grep -A 1 "BUNDLED WITH" Gemfile.lock | tail -n 1)" + +# set bundler to v2.4.22 which is the last version supported by ruby 2.7 +RUN gem install bundler -v 2.4.22 +ENV BUNDLE_PATH /bundle RUN bundle install COPY . /srv/ontoportal/ncbo_annotator +CMD ["/bin/bash"] diff --git a/Gemfile b/Gemfile index b7256ccc..eb58cc7a 100644 --- a/Gemfile +++ b/Gemfile @@ -1,19 +1,21 @@ source 'https://rubygems.org' -gem 'cube-ruby', require: 'cube' +gem 'cube-ruby' gem 'faraday', '~> 1.9' gem 'ffi' -gem 'minitest', '~> 4.0' -gem 'oj', '~> 2.0' +gem 'oj', '~> 3.0' gem 'rake', '~> 10.0' gem 'redis' gem 'ruby-xxHash' -# Development -gem 'pry', group: :development +group :development do + gem 'minitest', '~> 4.0' + gem 'pry' + gem 'simplecov' + gem 'simplecov-cobertura' # for codecov.io +end # NCBO gems (can be from a local dev path or from rubygems/git) gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'development' gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' gem 'ontologies_linked_data', github: 'ontoportal-lirmm/ontologies_linked_data', branch: 'development' -gem 'ncbo_resource_index', github: 'ncbo/resource_index' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index b66bd81b..b0003135 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,18 +1,5 @@ GIT - remote: https://github.com/ncbo/resource_index.git - revision: 24a7f14a6da4f4a0eaba1016ca5a378dfccd7441 - specs: - ncbo_resource_index (0.0.1) - elasticsearch (= 2.0.0) - mysql2 (= 0.5.2) - pony - ref - ruby-xxHash - sequel - typhoeus - -GIT - remote: https://github.com/ncbo/sparql-client.git + remote: https://github.com/ontoportal-lirmm/sparql-client.git revision: fb4a89b420f8eb6dda5190a126b6c62e32c4c0c9 branch: master specs: @@ -27,14 +14,13 @@ GIT branch: development specs: goo (0.0.2) - addressable (= 2.3.5) + addressable (~> 2.8) pry rdf (= 1.0.8) redis rest-client rsolr sparql-client - systemu uuid GIT @@ -49,7 +35,6 @@ GIT json libxml-ruby multi_json - ncbo_resource_index oj omni_logger pony @@ -67,25 +52,19 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.3.5) - bcrypt (3.1.17) + addressable (2.8.6) + public_suffix (>= 2.0.2, < 6.0) + bcrypt (3.1.20) + bigdecimal (3.1.5) builder (3.2.4) coderay (1.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.2.2) + connection_pool (2.4.1) cube-ruby (0.0.3) - domain_name (0.5.20190701) - unf (>= 0.0.5, < 1.0.0) - elasticsearch (2.0.0) - elasticsearch-api (= 2.0.0) - elasticsearch-transport (= 2.0.0) - elasticsearch-api (2.0.0) - multi_json - elasticsearch-transport (2.0.0) - faraday - multi_json - ethon (0.15.0) - ffi (>= 1.15.0) - faraday (1.10.0) + date (3.3.4) + docile (1.4.0) + domain_name (0.6.20240107) + faraday (1.10.3) faraday-em_http (~> 1.0) faraday-em_synchrony (~> 1.0) faraday-excon (~> 1.1) @@ -101,79 +80,100 @@ GEM faraday-em_synchrony (1.0.0) faraday-excon (1.1.0) faraday-httpclient (1.0.1) - faraday-multipart (1.0.3) - multipart-post (>= 1.2, < 3) + faraday-multipart (1.0.4) + multipart-post (~> 2) faraday-net_http (1.0.1) faraday-net_http_persistent (1.2.0) faraday-patron (1.0.0) faraday-rack (1.0.0) faraday-retry (1.0.3) - ffi (1.15.5) + ffi (1.16.3) http-accept (1.7.0) - http-cookie (1.0.4) + http-cookie (1.0.5) domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.6.1) - json_pure (2.6.1) - libxml-ruby (3.2.2) - logger (1.5.1) + json (2.7.1) + json_pure (2.7.1) + libxml-ruby (5.0.2) + logger (1.6.0) macaddr (1.7.2) systemu (~> 2.6.5) - mail (2.7.1) + mail (2.8.1) mini_mime (>= 0.1.1) + net-imap + net-pop + net-smtp method_source (1.0.0) - mime-types (3.4.1) + mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2022.0105) - mini_mime (1.1.2) + mime-types-data (3.2023.1205) + mini_mime (1.1.5) minitest (4.7.5) multi_json (1.15.0) - multipart-post (2.1.1) - mysql2 (0.5.2) + multipart-post (2.3.0) net-http-persistent (2.9.4) + net-imap (0.4.9.1) + date + net-protocol + net-pop (0.1.2) + net-protocol + net-protocol (0.2.2) + timeout + net-smtp (0.4.0.1) + net-protocol netrc (0.11.0) - oj (2.18.5) + oj (3.16.3) + bigdecimal (>= 3.0) omni_logger (0.1.4) logger pony (1.13.1) mail (>= 2.0) - pry (0.14.1) + pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - rack (2.2.3) - rack-test (1.1.0) - rack (>= 1.0, < 3) + public_suffix (5.0.4) + rack (3.0.8) + rack-test (2.1.0) + rack (>= 1.3) rake (10.5.0) rdf (1.0.8) addressable (>= 2.2) - redis (4.6.0) - ref (2.0.0) + redis (5.0.8) + redis-client (>= 0.17.0) + redis-client (0.19.1) + connection_pool rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) + rexml (3.2.6) rsolr (2.5.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) ruby-xxHash (0.4.0.2) ruby2_keywords (0.0.5) rubyzip (2.3.2) - sequel (5.56.0) + simplecov (0.22.0) + docile (~> 1.1) + simplecov-html (~> 0.11) + simplecov_json_formatter (~> 0.1) + simplecov-cobertura (2.1.0) + rexml + simplecov (~> 0.19) + simplecov-html (0.12.3) + simplecov_json_formatter (0.1.4) systemu (2.6.5) thread_safe (0.3.6) - typhoeus (1.4.0) - ethon (>= 0.9.0) - tzinfo (0.3.60) - unf (0.1.4) - unf_ext - unf_ext (0.0.8.1) + timeout (0.4.1) + tzinfo (0.3.62) uuid (2.3.9) macaddr (~> 1.0) PLATFORMS - x86_64-darwin-21 + ruby + x86_64-linux DEPENDENCIES cube-ruby @@ -181,14 +181,15 @@ DEPENDENCIES ffi goo! minitest (~> 4.0) - ncbo_resource_index! - oj (~> 2.0) + oj (~> 3.0) ontologies_linked_data! pry rake (~> 10.0) redis ruby-xxHash + simplecov + simplecov-cobertura sparql-client! BUNDLED WITH - 2.3.7 + 2.4.22 diff --git a/config/config.rb.test b/config/config.rb.test deleted file mode 100644 index bfa5c227..00000000 --- a/config/config.rb.test +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -# -# This file is designed for used for unit testing -# -# All the defaults are set in -# https://github.com/ncbo/ontologies_linked_data/blob/master/lib/ontologies_linked_data/config/config.rb -### -GOO_HOST = ENV.include?('GOO_HOST') ? ENV['GOO_HOST'] : 'localhost' -GOO_PORT = ENV.include?('GOO_PORT') ? ENV['GOO_PORT'] : 9000 -REDIS_HOST = ENV.include?('REDIS_HOST') ? ENV['REDIS_HOST'] : 'localhost' -REDIS_PORT = ENV.include?('REDIS_PORT') ? ENV['REDIS_PORT'] : 6379 -SOLR_HOST = ENV.include?('SOLR_HOST') ? ENV['SOLR_HOST'] : 'localhost' -MGREP_HOST = ENV.include?('MGREP_HOST') ? ENV['MGREP_HOST'] : 'localhost' -MGREP_PORT = ENV.include?('MGREP_PORT') ? ENV['MGREP_PORT'] : 55555 - -LinkedData.config do |config| - config.goo_host = GOO_HOST.to_s - config.goo_port = GOO_PORT.to_i - config.search_server_url = "http://#{SOLR_HOST}:8983/solr/term_search_core1".to_s - config.property_search_server_url = "http://#{SOLR_HOST}:8983/solr/prop_search_core1".to_s -end -Annotator.config do |config| - config.mgrep_host = MGREP_HOST.to_s - config.mgrep_port = MGREP_PORT.to_i - config.mgrep_dictionary_file = './test/data/dictionary.txt' - config.annotator_redis_host = REDIS_HOST.to_s - config.annotator_redis_port = REDIS_PORT.to_i -end diff --git a/config/config.test.rb b/config/config.test.rb new file mode 100644 index 00000000..bfba7b91 --- /dev/null +++ b/config/config.test.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# +# configuration for unit testing +# +GOO_HOST = ENV.include?('GOO_HOST') ? ENV['GOO_HOST'] : 'localhost' +GOO_PORT = ENV.include?('GOO_PORT') ? ENV['GOO_PORT'] : 9000 +GOO_BACKEND_NAME = ENV.include?("GOO_BACKEND_NAME") ? ENV["GOO_BACKEND_NAME"] : "4store" +GOO_PATH_QUERY = ENV.include?("GOO_PATH_QUERY") ? ENV["GOO_PATH_QUERY"] : "/sparql/" +GOO_PATH_DATA = ENV.include?("GOO_PATH_DATA") ? ENV["GOO_PATH_DATA"] : "/data/" +GOO_PATH_UPDATE = ENV.include?("GOO_PATH_UPDATE") ? ENV["GOO_PATH_UPDATE"] : "/update/" +REDIS_HOST = ENV.include?('REDIS_HOST') ? ENV['REDIS_HOST'] : 'localhost' +REDIS_PORT = ENV.include?('REDIS_PORT') ? ENV['REDIS_PORT'] : 6379 +MGREP_HOST = ENV.include?('MGREP_HOST') ? ENV['MGREP_HOST'] : 'localhost' +MGREP_PORT = ENV.include?('MGREP_PORT') ? ENV['MGREP_PORT'] : 55556 +SOLR_TERM_SEARCH_URL = ENV.include?("SOLR_TERM_SEARCH_URL") ? ENV["SOLR_TERM_SEARCH_URL"] : "http://localhost:8983/solr/term_search_core1" +SOLR_PROP_SEARCH_URL = ENV.include?("SOLR_PROP_SEARCH_URL") ? ENV["SOLR_PROP_SEARCH_URL"] : "http://localhost:8983/solr/prop_search_core1" + + +LinkedData.config do |config| + config.goo_backend_name = GOO_BACKEND_NAME.to_s + config.goo_host = GOO_HOST.to_s + config.goo_port = GOO_PORT.to_i + config.goo_path_query = GOO_PATH_QUERY.to_s + config.goo_path_data = GOO_PATH_DATA.to_s + config.goo_path_update = GOO_PATH_UPDATE.to_s + config.search_server_url = SOLR_TERM_SEARCH_URL.to_s + config.property_search_server_url = SOLR_PROP_SEARCH_URL.to_s +end +Annotator.config do |config| + config.mgrep_host = MGREP_HOST.to_s + config.mgrep_port = MGREP_PORT.to_i + config.mgrep_dictionary_file = './test/data/dictionary.txt' + config.annotator_redis_host = REDIS_HOST.to_s + config.annotator_redis_port = REDIS_PORT.to_i +end diff --git a/docker-compose.yml b/docker-compose.yml index abd88e8b..3b343882 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,37 +1,121 @@ version: '3.8' +x-app: &app + build: + context: . + args: + RUBY_VERSION: '2.7' + environment: &env + COVERAGE: 'true' # enable simplecov code coverage + REDIS_HOST: redis-ut + REDIS_PORT: 6379 + MGREP_HOST: mgrep-ut + MGREP_PORT: 55556 + SOLR_TERM_SEARCH_URL: http://solr-ut:8983/solr/term_search_core1 + SOLR_PROP_SEARCH_URL: http://solr-ut:8983/solr/prop_search_core1 + volumes: + - .:/srv/ontoportal/ncbo_annotator + stdin_open: true + tty: true + command: "bundle exec rake test TESTOPTS='-v'" + depends_on: &depends_on + solr-ut: + condition: service_healthy + redis-ut: + condition: service_healthy + mgrep-ut: + condition: service_healthy + services: - unit-test: - build: . + ruby: + <<: *app environment: - - GOO_BACKEND_NAME=4store - - GOO_PORT=9000 - - GOO_HOST=4store-ut - - REDIS_HOST=redis-ut - - REDIS_PORT=6379 - - SOLR_HOST=solr-ut - - MGREP_HOST=mgrep-ut - - MGREP_PORT=55555 + <<: *env + GOO_BACKEND_NAME: 4store + GOO_PORT: 9000 + GOO_HOST: 4store-ut depends_on: - - solr-ut - - redis-ut - - 4store-ut - - mgrep-ut - command: "bundle exec rake test TESTOPTS='-v'" + <<: *depends_on + 4store-ut: + condition: service_started + profiles: + - 4store + + ruby-agraph: + <<: *app + environment: + <<: *env + GOO_BACKEND_NAME: ag + GOO_PORT: 10035 + GOO_HOST: agraph-ut + GOO_PATH_QUERY: /repositories/bioportal_test + GOO_PATH_DATA: /repositories/bioportal_test/statements + GOO_PATH_UPDATE: /repositories/bioportal_test/statements + depends_on: + <<: *depends_on + agraph-ut: + condition: service_healthy + profiles: + - agraph solr-ut: - image: ontoportal/solr-ut:0.1 + image: ontoportal/solr-ut:0.0.2 + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:8983/solr/term_search_core1/admin/ping?wt=json | grep -iq '\"status\":\"OK\"}' || exit 1"] + start_period: 3s + interval: 10s + timeout: 5s + retries: 5 redis-ut: image: redis + healthcheck: + test: redis-cli ping + interval: 10s + timeout: 3s + retries: 10 mgrep-ut: - image: ontoportal/mgrep-ncbo:0.1 + image: ontoportal/mgrep:0.0.2 + platform: linux/amd64 + healthcheck: + test: ["CMD", "nc", "-z", "-v", "localhost", "55556"] + start_period: 3s + interval: 10s + timeout: 5s + retries: 5 4store-ut: image: bde2020/4store + platform: linux/amd64 command: > bash -c "4s-backend-setup --segments 4 ontoportal_kb && 4s-backend ontoportal_kb && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" + profiles: + - 4store + + agraph-ut: + image: franzinc/agraph:v8.0.0 + platform: linux/amd64 + environment: + - AGRAPH_SUPER_USER=test + - AGRAPH_SUPER_PASSWORD=xyzzy + shm_size: 1g + command: > + bash -c "/agraph/bin/agraph-control --config /agraph/etc/agraph.cfg start + ; agtool repos create bioportal_test + ; agtool users add anonymous + ; agtool users grant anonymous root:bioportal_test:rw + ; tail -f /agraph/data/agraph.log" + healthcheck: + test: ["CMD-SHELL", "agtool storage-report bioportal_test || exit 1"] + start_period: 30s + interval: 10s + timeout: 5s + retries: 5 + profiles: + - agraph +volumes: + bundle: diff --git a/lib/ncbo_annotator.rb b/lib/ncbo_annotator.rb index f085ede5..64a84105 100644 --- a/lib/ncbo_annotator.rb +++ b/lib/ncbo_annotator.rb @@ -3,7 +3,7 @@ # require 'sparql_http' # require 'ontologies_linked_data' # require_relative 'dictionary/generator' - +require 'uri' require 'zlib' require 'redis' require 'ontologies_linked_data' @@ -230,9 +230,9 @@ def delete_term_cache(redis_prefix) while !class_keys.empty? # use expire instead of del to allow potential clients to finish using the data - redis.pipelined { - class_keys.each {|key| redis.expire(key, key_expire_time)} - } + redis.pipelined do |pipeline| + class_keys.each { |key| pipeline.expire(key, key_expire_time) } + end redis.ltrim(key_storage, CHUNK_SIZE + 1, -1) # Remove what we just deleted class_keys = redis.lrange(key_storage, 0, CHUNK_SIZE) # Get next chunk end @@ -481,7 +481,7 @@ def annotate_direct(text, options={}) redis.pipelined do |pipeline| rawAnnotations.each do |ann| id = get_prefixed_id(cur_inst, ann.string_id) - redis_data[id] = { future: pipeline.hgetall(id) } + redis_data[id] = {future: pipeline.hgetall(id)} end end @@ -742,9 +742,9 @@ def create_term_entry(redis, instance_prefix, ontResourceId, resourceId, label_t # exclude single-character or empty/null values if (val.to_s.strip.length > 2) - id = get_prefixed_id_from_value(instance_prefix, val) + id = get_prefixed_id_from_value(instance_prefix, val.to_s) # populate dictionary structure - redis.hset(DICTHOLDER.call(instance_prefix), id, val) + redis.hset(DICTHOLDER.call(instance_prefix), id, val.to_s) entry = "#{label_type}#{LABEL_DELIM}#{ontResourceId}" # parse out semanticTypeCodes @@ -794,6 +794,9 @@ def mappings_for_class_ids(class_ids) end def hierarchy_query(class_ids) + # mdorf, 12/14/2023: AllegroGraph throws a MalformedQuery exception + # if an ID is not of the proper URI format + class_ids.select! { |id| id =~ /\A#{URI::regexp}\z/ } filter_ids = class_ids.map { |id| "?id = <#{id}>" } .join " || " query = < ?parent . } diff --git a/run-unit-tests.sh b/run-unit-tests.sh index c3065039..476c2b83 100755 --- a/run-unit-tests.sh +++ b/run-unit-tests.sh @@ -2,9 +2,9 @@ # sample script for running unit tests in docker. This functionality should be moved to a rake task # # add config for unit testing -[ -f config/config.rb ] || cp config/config.rb.test config/config.rb +[ -f config/config.rb ] || cp config/config.test.rb config/config.rb docker-compose build -#docker-compose up --exit-code-from unit-test -docker-compose run --rm unit-test bundle exec rake test TESTOPTS='-v' -docker-compose kill +docker-compose run --rm ruby bundle exec rake test TESTOPTS='-v' +#docker-compose run --rm ruby-agraph bundle exec rake test TESTOPTS='-v' +docker-compose --profile agraph --profile 4store kill diff --git a/test/test_annotator.rb b/test/test_annotator.rb index 869d784d..4c50ba95 100644 --- a/test/test_annotator.rb +++ b/test/test_annotator.rb @@ -97,7 +97,7 @@ def test_generate_dictionary_file assert @@redis.exists?(Annotator::Models::NcboAnnotator::MGREP_DICTIONARY_REFRESH_TIMESTAMP) assert @@redis.exists?(Annotator::Models::NcboAnnotator::LAST_MGREP_RESTART_TIMESTAMP) refresh_timestamp = @@redis.get(Annotator::Models::NcboAnnotator::MGREP_DICTIONARY_REFRESH_TIMESTAMP) - assert refresh_timestamp > start_timestamp + assert_operator refresh_timestamp, :>, start_timestamp end def test_mallet_recognizer @@ -397,7 +397,7 @@ def test_annotate_minsize_term expand_with_mappings: false, min_term_size: 10, whole_word_only: true, - with_synonyms: true + with_synonyms: false }) direct = annotations diff --git a/test/test_case.rb b/test/test_case.rb index 025649e8..52476b5d 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -1,3 +1,20 @@ +if ENV['COVERAGE'] == 'true' || ENV['CI'] == 'true' + require 'simplecov' + require 'simplecov-cobertura' + # https://github.com/codecov/ruby-standard-2 + # Generate HTML and Cobertura reports which can be consumed by codecov uploader + SimpleCov.formatters = SimpleCov::Formatter::MultiFormatter.new([ + SimpleCov::Formatter::HTMLFormatter, + SimpleCov::Formatter::CoberturaFormatter + ]) + SimpleCov.start do + add_filter '/test/' + add_filter 'app.rb' + add_filter 'init.rb' + add_filter '/config/' + end +end + require 'ontologies_linked_data' require_relative '../lib/ncbo_annotator' require_relative '../config/config'