From db35d02e3cc63099ac0e92d83193884db8e987f6 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sun, 1 Sep 2024 23:56:56 +0200 Subject: [PATCH 1/6] add unit tests of the expected behavior of the federation --- config/config.test.rb | 31 +++++++-- test/models/test_federation.rb | 122 +++++++++++++++++++++++++++++++++ test/test_case.rb | 2 + 3 files changed, 149 insertions(+), 6 deletions(-) create mode 100644 test/models/test_federation.rb diff --git a/config/config.test.rb b/config/config.test.rb index 9185c5d..1200719 100644 --- a/config/config.test.rb +++ b/config/config.test.rb @@ -1,14 +1,33 @@ -# config.rb is required for testing -# unit test makes calls to bioportal api so it needs a valid API key which can -# be set via ENV variable UT_APIKEY -abort('UT_APIKEY env variable is not set. Canceling tests') unless ENV.include?('UT_APIKEY') -abort('UT_APIKEY env variable is set to an empty value. Canceling tests') unless ENV['UT_APIKEY'].size > 5 $API_CLIENT_INVALIDATE_CACHE = false $DEBUG_API_CLIENT = false + LinkedData::Client.config do |config| - config.rest_url = 'https://data.bioontology.org' + config.rest_url = 'https://data.bioontology.org/' config.apikey = '8b5b7825-538d-40e0-9e9e-5ab9274a9aeb' config.links_attr = 'links' config.cache = true config.debug_client = false + config.debug_client_keys = [] + config.federated_portals = { + bioportal: { + api: 'https://data.agroportal.lirmm.fr/', + apikey: '1de0a270-29c5-4dda-b043-7c3580628cd5', + color: '#234979', + }, + ecoportal: { + api: 'https://data.ecoportal.lifewatch.eu/', + apikey: "43a437ba-a437-4bf0-affd-ab520e584719", + color: '#0f4e8a', + }, + # earthportal: { + # api: 'https://earthportal.eu:8443/', + # apikey: "c9147279-954f-41bd-b068-da9b0c441288", + # color: '#1e2251', + # }, + biodivportal: { + api: 'https://data.biodivportal.gfbio.org/', + apikey: "47a57aa3-7b54-4f34-b695-dbb5f5b7363e", + color: '#1e2251', + } + } end diff --git a/test/models/test_federation.rb b/test/models/test_federation.rb new file mode 100644 index 0000000..4890af5 --- /dev/null +++ b/test/models/test_federation.rb @@ -0,0 +1,122 @@ +require_relative '../test_case' +require 'pry' +require 'benchmark' +require 'webmock' +require 'request_store' + +class FederationTest < LinkedData::Client::TestCase + + def test_federated_ontologies_all + ontologies = [] + time1 = Benchmark.realtime do + ontologies = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false) + end + + ontologies_federate_all = [] + time2 = Benchmark.realtime do + ontologies_federate_all = LinkedData::Client::Models::Ontology.all(federate: true, display_links: false, display_context: false) + end + + puts "" + puts "AgroPortal ontologies: #{ontologies.length} in #{time1}s" + puts "Federated ontologies: #{ontologies_federate_all.length} in #{time2}s" + + refute_equal ontologies.length, ontologies_federate_all.length + + ontologies_federate_all.group_by{|x| x.id.split('/')[0..-2].join('/')}.each do |portal, onts| + puts "#{portal} ontologies: #{onts.length}" + end + + ontologies_federate_all_cache = [] + time2 = Benchmark.realtime do + ontologies_federate_all_cache = LinkedData::Client::Models::Ontology.all(federate: true, display_links: false, display_context: false) + end + + + puts "Federated ontologies with cache: #{ontologies_federate_all_cache.length} in #{time2}s" + + assert_equal ontologies_federate_all_cache.size, ontologies_federate_all.size + + ontologies_federate_two = [] + time2 = Benchmark.realtime do + ontologies_federate_two = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false) + end + + puts "Federated ontologies with two portal only with cache: #{ontologies_federate_two.length} in #{time2}s" + + refute_equal ontologies_federate_two.size, ontologies_federate_all.size + + federated_portals = ontologies_federate_two.map{|x| x.id.split('/')[0..-2].join('/')}.uniq + assert_equal 3, federated_portals.size + assert %w[bioontology ecoportal biodivportal].all? { |p| federated_portals.any?{|id| id[p]} } + end + + def test_federated_submissions_all + onts = [] + time1 = Benchmark.realtime do + onts = LinkedData::Client::Models::OntologySubmission.all + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + + puts "" + puts "AgroPortal submissions: #{onts.length} in #{time1}s" + puts "Federated submissions: #{onts_federate.length} in #{time2}s" + + refute_equal onts.length, onts_federate.length + + onts_federate.group_by{|x| x.id.split('/')[0..-4].join('/')}.each do |portal, onts| + puts "#{portal} submissions: #{onts.length}" + end + + onts_federate = [] + time2 = Benchmark.realtime do + onts_federate = LinkedData::Client::Models::OntologySubmission.all(federate: true) + end + puts "Federated submissions with cache: #{onts_federate.length} in #{time2}s" + + end + + def test_federation_middleware + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false) + + RequestStore.store[:federated_portals] = [:ecoportal, :biodivportal] #saved globally + + ontologies_federate_two = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false) + assert_equal ontologies_federate_one.size, ontologies_federate_two.size + end + + + def test_federation_error + WebMock.enable! + LinkedData::Client::Models::Ontology.all(invalidate_cache: true) + WebMock.stub_request(:get, "#{LinkedData::Client.settings.rest_url.chomp('/')}/ontologies?include=all&display_links=false&display_context=false") + .to_return(body: "Internal server error", status: 500) + + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(federate: [:ecoportal, :biodivportal], display_links: false, display_context: false, invalidate_cache: true) + + assert_equal "Problem retrieving #{LinkedData::Client.settings.rest_url}/ontologies", ontologies_federate_one.first.errors + + WebMock.disable! + end + + def test_federated_analytics + RequestStore.store[:federated_portals] = [:ecoportal,:biodivportal] + analytics = LinkedData::Client::Analytics.last_month + refute_empty analytics.onts + end + + + def test_federation_ssl_error + WebMock.enable! + WebMock.stub_request(:get, "#{LinkedData::Client.settings.rest_url.chomp('/')}") + .to_raise(Faraday::SSLError) + ontologies_federate_one = LinkedData::Client::Models::Ontology.all(display_links: false, display_context: false, invalidate_cache: true) + + refute_nil ontologies_federate_one.first.errors + WebMock.disable! + end +end \ No newline at end of file diff --git a/test/test_case.rb b/test/test_case.rb index cd9fe2e..0f2f907 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -1,7 +1,9 @@ require 'test-unit' require_relative '../lib/ontologies_api_client' require_relative '../config/config' +require 'webmock' +WebMock.allow_net_connect! module LinkedData module Client class TestCase < Test::Unit::TestCase From 9d5bb868fa1eef9fe3336a1354f42a0555725f14 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sun, 1 Sep 2024 23:58:16 +0200 Subject: [PATCH 2/6] add caching debuging message option to see the cached and missed calls --- lib/ontologies_api_client/http.rb | 12 +++++++++--- .../middleware/faraday-object-cache.rb | 5 +++-- test/middleware/test_cache.rb | 10 +++++++--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/ontologies_api_client/http.rb b/lib/ontologies_api_client/http.rb index d3df241..ebd8750 100644 --- a/lib/ontologies_api_client/http.rb +++ b/lib/ontologies_api_client/http.rb @@ -3,6 +3,7 @@ require 'digest' require 'ostruct' require 'benchmark' +require 'active_support/cache' ## # This monkeypatch makes OpenStruct act like Struct objects class OpenStruct @@ -48,22 +49,27 @@ def self.conn rails = Kernel.const_get("Rails") store = rails.cache if rails.cache end - LinkedData::Client.config_connection(cache_store: store) + LinkedData::Client.config_connection(cache_store: store || ActiveSupport::Cache::MemoryStore.new) end LinkedData::Client.settings.conn end + def self.federated_conn + LinkedData::Client.settings.federated_conn + end + def self.get(path, params = {}, options = {}) headers = options[:headers] || {} raw = options[:raw] || false # return the unparsed body of the request params = params.delete_if { |k, v| v == nil || v.to_s.empty? } params[:ncbo_cache_buster] = Time.now.to_f if raw # raw requests don't get cached to ensure body is available invalidate_cache = params.delete(:invalidate_cache) || $API_CLIENT_INVALIDATE_CACHE || false + connection = options[:connection] || conn begin begin response = nil time = Benchmark.realtime do - response = conn.get do |req| + response = connection.get do |req| req.url path req.params = params.dup req.options[:timeout] = 60 @@ -71,7 +77,7 @@ def self.get(path, params = {}, options = {}) req.headers[:invalidate_cache] = invalidate_cache end end - puts "Getting: #{path} with #{params} (#{time}s)" if $DEBUG_API_CLIENT + puts "Getting: #{path} with #{params} (t: #{time}s - cache: #{response.headers["X-Rack-Cache"]})" if $DEBUG_API_CLIENT rescue Exception => e params = Faraday::Utils.build_query(params) path << "?" unless params.empty? || path.include?("?") diff --git a/lib/ontologies_api_client/middleware/faraday-object-cache.rb b/lib/ontologies_api_client/middleware/faraday-object-cache.rb index 080c416..f8ea13f 100644 --- a/lib/ontologies_api_client/middleware/faraday-object-cache.rb +++ b/lib/ontologies_api_client/middleware/faraday-object-cache.rb @@ -1,4 +1,5 @@ require 'digest/sha1' +require 'active_support' require 'active_support/cache' require 'lz4-ruby' require_relative '../http' @@ -70,7 +71,7 @@ def retrieve_cached_response(request_key) env = { status: 304 } cached_response = ObjectCacheResponse.new(env) cached_response.parsed_body = ld_obj - cached_response.env.response_headers = { "x-rack-cache" => 'hit' } + cached_response.env.response_headers = { "X-Rack-Cache" => 'hit' } cached_response end @@ -88,7 +89,7 @@ def process_response(response_env, request_key) response = ObjectCacheResponse.new(response_env) response.parsed_body = ld_obj - response.env.response_headers["x-rack-cache"] = cache_state + response.env.response_headers["X-Rack-Cache"] = cache_state response end diff --git a/test/middleware/test_cache.rb b/test/middleware/test_cache.rb index ddea7a9..943b55c 100644 --- a/test/middleware/test_cache.rb +++ b/test/middleware/test_cache.rb @@ -20,6 +20,10 @@ def setup end end + def teardown + WebMock.disable! + end + def test_cache_hit_for_get_request body1, body2 = nil # First request should not hit the cache @@ -128,14 +132,14 @@ def test_cache_last_modified private def cached?(response) - response.env.response_headers['x-rack-cache'].eql?('hit') + response.env.response_headers['X-Rack-Cache'].eql?('hit') end def uncached?(response) - response.env.response_headers['x-rack-cache'].eql?('miss') + response.env.response_headers['X-Rack-Cache'].eql?('miss') end def refreshed?(response) - response.env.response_headers['x-rack-cache'].eql?('fresh') + response.env.response_headers['X-Rack-Cache'].eql?('fresh') end end From 1daf74f9be56cb273f8830d3d58d69cf776686d7 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sun, 1 Sep 2024 23:59:35 +0200 Subject: [PATCH 3/6] update the initialization of the http connection to have multiple by API --- lib/ontologies_api_client/config.rb | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/ontologies_api_client/config.rb b/lib/ontologies_api_client/config.rb index 25805a8..fbf6cfb 100644 --- a/lib/ontologies_api_client/config.rb +++ b/lib/ontologies_api_client/config.rb @@ -37,8 +37,22 @@ def config(&block) def config_connection(options = {}) return if @settings_run_connection - store = options[:cache_store] - @settings.conn = Faraday.new(@settings.rest_url) do |faraday| + store = options[:cache_store] || ActiveSupport::Cache::MemoryStore.new + @settings.conn = faraday_connection(@settings.rest_url, @settings.apikey, store) + @settings.federated_conn = @settings.federated_portals.map do |portal_name, portal_info| + [portal_name, faraday_connection(portal_info[:api], portal_info[:apikey], store)] + end.to_h + + @settings_run_connection = true + end + + def connection_configured? + @settings_run_connection + end + + private + def faraday_connection(url, apikey, store) + Faraday.new(url.to_s.chomp('/')) do |faraday| if @settings.enable_long_request_log require_relative 'middleware/faraday-long-requests' faraday.use :long_requests @@ -69,15 +83,10 @@ def config_connection(options = {}) faraday.adapter :excon faraday.headers = { "Accept" => "application/json", - "Authorization" => "apikey token=#{@settings.apikey}", + "Authorization" => "apikey token=#{apikey}", "User-Agent" => "NCBO API Ruby Client v0.1.0" } end - @settings_run_connection = true - end - - def connection_configured? - @settings_run_connection end end end \ No newline at end of file From 63d00f0f38372087afa06cbc98b2c5ef0f8f6785 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 2 Sep 2024 00:00:48 +0200 Subject: [PATCH 4/6] create the RequestFederation to implement federated_get function --- lib/ontologies_api_client.rb | 1 + .../request_federation.rb | 45 +++++++++++++++++++ ontologies_api_client.gemspec | 4 +- 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 lib/ontologies_api_client/request_federation.rb diff --git a/lib/ontologies_api_client.rb b/lib/ontologies_api_client.rb index 739639a..43c4f6d 100644 --- a/lib/ontologies_api_client.rb +++ b/lib/ontologies_api_client.rb @@ -1,6 +1,7 @@ require 'oj' require 'multi_json' require 'spawnling' +require 'request_store' require_relative 'ontologies_api_client/config' require_relative 'ontologies_api_client/http' diff --git a/lib/ontologies_api_client/request_federation.rb b/lib/ontologies_api_client/request_federation.rb new file mode 100644 index 0000000..458af90 --- /dev/null +++ b/lib/ontologies_api_client/request_federation.rb @@ -0,0 +1,45 @@ +require 'active_support/core_ext/hash' + +module LinkedData + module Client + module RequestFederation + + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + def federated_get(params = {}, &link) + portals = request_portals(params) + + connections = Parallel.map(portals, in_threads: portals.size) do |conn| + begin + HTTP.get(link.call(conn.url_prefix.to_s.chomp('/')), params, connection: conn) + rescue StandardError => e + [OpenStruct.new(errors: "Problem retrieving #{link.call(conn.url_prefix.to_s.chomp('/')) || conn.url_prefix}")] + end + end + + connections.flatten + end + + def request_portals(params = {}) + federate = params.delete(:federate) || ::RequestStore.store[:federated_portals] + + portals = [LinkedData::Client::HTTP.conn] + + if federate.is_a?(Array) + portals += LinkedData::Client::HTTP.federated_conn + .select { |portal_name, _| federate.include?(portal_name) || federate.include?(portal_name.to_s) } + .values + elsif !federate.blank? # all + portals += LinkedData::Client::HTTP.federated_conn.values + end + + portals + end + end + + end + end +end diff --git a/ontologies_api_client.gemspec b/ontologies_api_client.gemspec index fad13e4..b2ae8c2 100644 --- a/ontologies_api_client.gemspec +++ b/ontologies_api_client.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |gem| gem.require_paths = ["lib"] gem.version = "2.2.0" - gem.add_dependency('activesupport') + gem.add_dependency('activesupport', '~> 7.0.4') gem.add_dependency('excon') gem.add_dependency('faraday') gem.add_dependency('faraday-excon', '~> 2.0.0') @@ -20,5 +20,7 @@ Gem::Specification.new do |gem| gem.add_dependency('lz4-ruby') gem.add_dependency('multi_json') gem.add_dependency('oj') + gem.add_dependency('parallel') + gem.add_dependency('request_store') gem.add_dependency('spawnling', '2.1.5') end From 1161610ca3ed7463bb34ea107a8ab063a98bb749 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 2 Sep 2024 00:02:24 +0200 Subject: [PATCH 5/6] update collections calls to use federated_get() instead of get() --- Gemfile.lock | 35 +++++++++++++++---------- lib/ontologies_api_client/base.rb | 13 ++++++--- lib/ontologies_api_client/collection.rb | 15 ++++++++--- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 413fd8c..dd5fb94 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: ontologies_api_client (2.2.0) - activesupport + activesupport (~> 7.0.4) excon faraday faraday-excon (~> 2.0.0) @@ -10,12 +10,14 @@ PATH lz4-ruby multi_json oj + parallel + request_store spawnling (= 2.1.5) GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4) + activesupport (7.0.8.1) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -24,11 +26,11 @@ GEM public_suffix (>= 2.0.2, < 6.0) bigdecimal (3.1.7) coderay (1.1.3) - concurrent-ruby (1.1.10) + concurrent-ruby (1.2.3) crack (1.0.0) bigdecimal rexml - excon (0.95.0) + excon (0.110.0) faraday (2.0.1) faraday-net_http (~> 2.0) ruby2_keywords (>= 0.0.4) @@ -39,26 +41,31 @@ GEM multipart-post (~> 2) faraday-net_http (2.1.0) hashdiff (1.1.0) - i18n (1.12.0) + i18n (1.14.4) concurrent-ruby (~> 1.0) lz4-ruby (0.3.3) - method_source (1.0.0) - minitest (5.16.3) + method_source (1.1.0) + minitest (5.22.3) multi_json (1.15.0) - multipart-post (2.2.3) - oj (3.13.23) - power_assert (2.0.2) - pry (0.14.1) + multipart-post (2.4.0) + oj (3.16.3) + bigdecimal (>= 3.0) + parallel (1.24.0) + power_assert (2.0.3) + pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.0.5) - rake (13.0.6) + rack (3.0.10) + rake (13.2.1) + request_store (1.7.0) + rack (>= 1.4) rexml (3.2.6) ruby2_keywords (0.0.5) spawnling (2.1.5) - test-unit (3.5.7) + test-unit (3.6.2) power_assert - tzinfo (2.0.5) + tzinfo (2.0.6) concurrent-ruby (~> 1.0) webmock (3.23.0) addressable (>= 2.8.0) diff --git a/lib/ontologies_api_client/base.rb b/lib/ontologies_api_client/base.rb index ff4aba6..a4404df 100644 --- a/lib/ontologies_api_client/base.rb +++ b/lib/ontologies_api_client/base.rb @@ -132,11 +132,16 @@ def create_attributes(attributes) attr_exists = self.public_methods(false).include?(attr) unless attr_exists self.class.class_eval do - define_method attr.to_sym do - instance_variable_get("@#{attr}") + unless method_defined?(attr.to_sym) + define_method attr.to_sym do + instance_variable_get("@#{attr}") + end end - define_method "#{attr}=" do |val| - instance_variable_set("@#{attr}", val) + + unless method_defined?("#{attr}=".to_sym) + define_method "#{attr}=" do |val| + instance_variable_set("@#{attr}", val) + end end end end diff --git a/lib/ontologies_api_client/collection.rb b/lib/ontologies_api_client/collection.rb index 77572bf..a4d34c2 100644 --- a/lib/ontologies_api_client/collection.rb +++ b/lib/ontologies_api_client/collection.rb @@ -1,11 +1,15 @@ require_relative 'config' require_relative 'http' +require_relative 'request_federation' +require 'parallel' module LinkedData module Client module Collection + def self.included(base) + base.include LinkedData::Client::RequestFederation base.extend(ClassMethods) end @@ -24,8 +28,8 @@ def method_missing(meth, *args, &block) ## # Get all top-level links for the API - def top_level_links - @top_level_links||= HTTP.get(LinkedData::Client.settings.rest_url) + def top_level_links(link = LinkedData::Client.settings.rest_url) + HTTP.get(link) end ## @@ -36,11 +40,14 @@ def uri_from_context(object, media_type) end end + ## # Get the first collection of resources for a given type def entry_point(media_type, params = {}) - params = {include: @include_attrs}.merge(params) - HTTP.get(uri_from_context(top_level_links, media_type), params) + params = { include: @include_attrs, display_links: false, display_context: false}.merge(params) + federated_get(params) do |url| + uri_from_context(top_level_links(url), media_type) rescue nil + end end ## From f651f9f920a55492629ad08a83dde4ed65278def Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Mon, 2 Sep 2024 00:02:51 +0200 Subject: [PATCH 6/6] update the special case of analytics to handle federated calls --- lib/ontologies_api_client/analytics.rb | 35 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/ontologies_api_client/analytics.rb b/lib/ontologies_api_client/analytics.rb index cbb85ac..b6a78cc 100644 --- a/lib/ontologies_api_client/analytics.rb +++ b/lib/ontologies_api_client/analytics.rb @@ -1,6 +1,9 @@ +require_relative 'request_federation' + module LinkedData::Client class Analytics HTTP = LinkedData::Client::HTTP + include LinkedData::Client::RequestFederation attr_accessor :onts, :date @@ -10,18 +13,32 @@ def self.all(params = {}) def self.last_month data = self.new - data.date = last_month = DateTime.now - 1.month + last_month = DateTime.now.prev_month year_num = last_month.year month_num = last_month.month - analytics = get(:analytics, {year: year_num, month: month_num}).to_h - analytics.delete(:links) - analytics.delete(:context) + params = { year: year_num, month: month_num } + + responses = federated_get(params) do |url| + "#{url}/analytics" + end + + portals = request_portals onts = [] - analytics.keys.each do |ont| - views = analytics[ont][:"#{year_num}"][:"#{month_num}"] - onts << {ont: ont, views: views} + responses.each_with_index do |portal_views, index| + next nil if portal_views&.errors + + portal_views = portal_views.to_h + + url = portals[index].url_prefix.to_s.chomp('/') + portal_views.delete(:links) + portal_views.delete(:context) + portal_views.keys.map do |ont| + views = portal_views[ont][:"#{year_num}"][:"#{month_num}"] + onts << { ont: "#{url}/ontologies/#{ont}", views: views } + end end - data.onts = onts + + data.onts = onts.flatten.compact data end @@ -29,7 +46,7 @@ def self.last_month def self.get(path, params = {}) path = path.to_s - path = "/"+path unless path.start_with?("/") + path = "/" + path unless path.start_with?("/") HTTP.get(path, params) end