diff --git a/.gitignore b/.gitignore index 5dcefa7c..3f0f3286 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,7 @@ doc/ .idea/* projectFilesBackup/* -config/config.rb \ No newline at end of file +config/config.rb +queries.txt + +*.iml diff --git a/Gemfile.lock b/Gemfile.lock index 3fe30043..e5d8d339 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: 24bccbd0f4a5150fa6ce2af50d7c378c681027ea + revision: d4737ff08f33517cf93b4d82c78a471017991d97 branch: development specs: sparql-client (3.2.2) diff --git a/lib/goo.rb b/lib/goo.rb index 3c7a45e5..41b2e808 100644 --- a/lib/goo.rb +++ b/lib/goo.rb @@ -52,7 +52,8 @@ module Goo @@uuid = UUID.new @@debug_enabled = false @@use_cache = false - + @@query_logging = false + @@query_logging_file = './queries.log' @@slice_loading_size = 500 @@ -122,7 +123,8 @@ def self.add_sparql_backend(name, *opts) headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, read_timeout: 10000, validate: false, - redis_cache: @@redis_client) + redis_cache: @@redis_client, + logger: query_logging? ? Logger.new(query_logging_file) : nil) @@sparql_backends[name][:update] = Goo::SPARQL::Client.new(opts[:update], protocol: "1.1", headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"}, @@ -174,6 +176,25 @@ def self.queries_debug? return @@debug_enabled end + def self.query_logging? + @@query_logging + end + + def self.query_logging_file + @@query_logging_file + end + + def self.query_logging=(value) + @@query_logging = value + end + def self.query_logging_file=(value) + @@query_logging_file = value + end + + def self.logger + return @@sparql_backends[:main][:query].logger + end + def self.add_search_backend(name, *opts) opts = opts[0] unless opts.include? :service @@ -193,6 +214,12 @@ def self.add_redis_backend(*opts) set_sparql_cache end + def self.add_query_logger(enabled: false, file: ) + @@query_logging = enabled + @@query_logging_file = file + set_query_logging + end + def self.set_sparql_cache if @@sparql_backends.length > 0 && @@use_cache @@sparql_backends.each do |k,epr| @@ -209,38 +236,19 @@ def self.set_sparql_cache end end - def self.set_cube_client - if @@sparql_backends.length > 0 && @@cube_options + + def self.set_query_logging + if @@sparql_backends.length > 0 && query_logging? @@sparql_backends.each do |k,epr| - epr[:query].cube_options= @@cube_options - epr[:data].cube_options= @@cube_options - epr[:update].cube_options= @@cube_options + epr[:query].logger = Logger.new(query_logging_file) end - puts "Using cube options in Goo #{@@cube_options}" elsif @@sparql_backends.length > 0 @@sparql_backends.each do |k,epr| - epr[:query].cube_options= nil - epr[:data].cube_options= nil - epr[:update].cube_options=nil + epr[:query].logger = nil end end end - def self.enable_cube - if not block_given? - raise ArgumentError, "Cube configuration needs to receive a code block" - end - cube_options = {} - yield cube_options - @@cube_options = cube_options - set_cube_client - end - - def self.disable_cube - @@cube_options = nil - set_cube_client - end - def self.configure_sanity_check() unless @@namespaces.length > 0 raise ArgumentError, "Namespaces needs to be provided." diff --git a/lib/goo/base/where.rb b/lib/goo/base/where.rb index d4668e4c..05a06984 100644 --- a/lib/goo/base/where.rb +++ b/lib/goo/base/where.rb @@ -168,8 +168,26 @@ def index_as(index_key,max=nil) return rclient.llen(final_key) end + def paginated_all(page_size=1000) + page = 1 + page_size = 10000 + result = [] + old_count = -1 + count = 0 + while count != old_count + old_count = count + @page_i = page + @page_size = page_size + result += process_query(count=false) + page += 1 + count = result.length + end + result + end + def all - process_query unless @result + return @result if @result + process_query @result end alias_method :to_a, :all diff --git a/lib/goo/config/config.rb b/lib/goo/config/config.rb index 2019893c..8c2839ce 100644 --- a/lib/goo/config/config.rb +++ b/lib/goo/config/config.rb @@ -24,11 +24,14 @@ def config(&block) @settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost' @settings.redis_port ||= ENV['REDIS_PORT'] || 6379 @settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/' + @settings.query_logging ||= ENV['QUERIES_LOGGING'] || false + @settings.query_logging_file||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log' @settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false @settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500 puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}" puts "(GOO) >> Using term search server at #{@settings.search_server_url}" puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}" + puts "(GOO) >> Using Query logging: #{@settings.query_logging_file}" if @settings.query_logging connect_goo end @@ -42,9 +45,10 @@ def connect_goo query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}", data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}", update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}", - options: { rules: :NONE }) + options: { rules: :NONE}) conf.add_search_backend(:main, service: @settings.search_server_url) conf.add_redis_backend(host: @settings.goo_redis_host, port: @settings.goo_redis_port) + conf.add_query_logger(enabled: @settings.query_logging, file: @settings.query_logging_file) conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/")) conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 9701d131..f04d5dbc 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -112,12 +112,19 @@ def append_triples_no_bnodes(graph, file_path, mime_type_in) else bnodes_filter, dir = bnodes_filter_file(file_path, mime_type_in) end - chunk_lines = 50_000 # number of line + + if Goo.backend_vo? || Goo.backend_ag? + chunk_lines = 50_000 # number of line + else + chunk_lines = 500_000 # number of line + end + file = File.foreach(bnodes_filter) lines = [] line_count = 0 file.each_entry do |line| lines << line + if lines.size == chunk_lines response = append_triples_batch(graph, lines, mime_type_in, line_count) line_count += lines.size diff --git a/test/test_logging.rb b/test/test_logging.rb new file mode 100644 index 00000000..112efd08 --- /dev/null +++ b/test/test_logging.rb @@ -0,0 +1,54 @@ +require_relative 'test_case' +require_relative 'models' + +class TestLogging < MiniTest::Unit::TestCase + + def self.before_suite + GooTestData.create_test_case_data + Goo.use_cache = true + Goo.redis_client.flushdb + Goo.add_query_logger(enabled: true, file: "test.log") + end + + def self.after_suite + GooTestData.delete_test_case_data + Goo.add_query_logger(enabled: false, file: nil) + File.delete("test.log") if File.exist?("test.log") + Goo.redis_client.flushdb + Goo.use_cache = false + end + + def setup + Goo.redis_client.flushdb + end + + def test_logging + Goo.logger.info("Test logging") + University.all + recent_logs = Goo.logger.get_logs + assert_equal 3, recent_logs.length + assert recent_logs.any? { |x| x['query'].include?("Test logging") } + assert File.read("test.log").include?("Test logging") + end + + def test_last_10s_logs + Goo.logger.info("Test logging 2") + University.all + recent_logs = Goo.logger.queries_last_n_seconds(1) + assert_equal 3, recent_logs.length + assert recent_logs.any? { |x| x['query'].include?("Test logging 2") } + assert File.read("test.log").include?("Test logging 2") + sleep 1 + recent_logs = Goo.logger.queries_last_n_seconds(1) + assert_equal 0, recent_logs.length + end + + def test_auto_clean_logs + Goo.logger.info("Test logging 3") + (1..3000).each do |_i| + University.all + end + recent_logs = Goo.logger.get_logs + assert recent_logs.length < 2000 + end +end