Skip to content

Commit

Permalink
Feature: Add paginated all function for resources and logging config …
Browse files Browse the repository at this point in the history
…and tests (#67)

* handle the case were we don't use the indexation feature

* change ask to select for better compatibility with virtuoso

* update append triples function to catch errors and log them

* put the number of appended chunks to 50 000 line by chunck

* update chunks insert for virtuoso to be 50k lines instead of 500k to prevent crashing

* implement mode paginated_all that will get all the resources using pagination

* add query logging configuration and remove cube

* add logging test
  • Loading branch information
syphax-bouazzouni authored Jan 21, 2025
1 parent 8d108c2 commit b8eb3d0
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 31 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,7 @@ doc/
.idea/*
projectFilesBackup/*

config/config.rb
config/config.rb
queries.txt

*.iml
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ontoportal-lirmm/sparql-client.git
revision: 24bccbd0f4a5150fa6ce2af50d7c378c681027ea
revision: d4737ff08f33517cf93b4d82c78a471017991d97
branch: development
specs:
sparql-client (3.2.2)
Expand Down
60 changes: 34 additions & 26 deletions lib/goo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ module Goo
@@uuid = UUID.new
@@debug_enabled = false
@@use_cache = false

@@query_logging = false
@@query_logging_file = './queries.log'
@@slice_loading_size = 500


Expand Down Expand Up @@ -122,7 +123,8 @@ def self.add_sparql_backend(name, *opts)
headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"},
read_timeout: 10000,
validate: false,
redis_cache: @@redis_client)
redis_cache: @@redis_client,
logger: query_logging? ? Logger.new(query_logging_file) : nil)
@@sparql_backends[name][:update] = Goo::SPARQL::Client.new(opts[:update],
protocol: "1.1",
headers: { "Content-Type" => "application/x-www-form-urlencoded", "Accept" => "application/sparql-results+json"},
Expand Down Expand Up @@ -174,6 +176,25 @@ def self.queries_debug?
return @@debug_enabled
end

def self.query_logging?
@@query_logging
end

def self.query_logging_file
@@query_logging_file
end

def self.query_logging=(value)
@@query_logging = value
end
def self.query_logging_file=(value)
@@query_logging_file = value
end

def self.logger
return @@sparql_backends[:main][:query].logger
end

def self.add_search_backend(name, *opts)
opts = opts[0]
unless opts.include? :service
Expand All @@ -193,6 +214,12 @@ def self.add_redis_backend(*opts)
set_sparql_cache
end

def self.add_query_logger(enabled: false, file: )
@@query_logging = enabled
@@query_logging_file = file
set_query_logging
end

def self.set_sparql_cache
if @@sparql_backends.length > 0 && @@use_cache
@@sparql_backends.each do |k,epr|
Expand All @@ -209,38 +236,19 @@ def self.set_sparql_cache
end
end

def self.set_cube_client
if @@sparql_backends.length > 0 && @@cube_options

def self.set_query_logging
if @@sparql_backends.length > 0 && query_logging?
@@sparql_backends.each do |k,epr|
epr[:query].cube_options= @@cube_options
epr[:data].cube_options= @@cube_options
epr[:update].cube_options= @@cube_options
epr[:query].logger = Logger.new(query_logging_file)
end
puts "Using cube options in Goo #{@@cube_options}"
elsif @@sparql_backends.length > 0
@@sparql_backends.each do |k,epr|
epr[:query].cube_options= nil
epr[:data].cube_options= nil
epr[:update].cube_options=nil
epr[:query].logger = nil
end
end
end

def self.enable_cube
if not block_given?
raise ArgumentError, "Cube configuration needs to receive a code block"
end
cube_options = {}
yield cube_options
@@cube_options = cube_options
set_cube_client
end

def self.disable_cube
@@cube_options = nil
set_cube_client
end

def self.configure_sanity_check()
unless @@namespaces.length > 0
raise ArgumentError, "Namespaces needs to be provided."
Expand Down
20 changes: 19 additions & 1 deletion lib/goo/base/where.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,26 @@ def index_as(index_key,max=nil)
return rclient.llen(final_key)
end

def paginated_all(page_size=1000)
page = 1
page_size = 10000
result = []
old_count = -1
count = 0
while count != old_count
old_count = count
@page_i = page
@page_size = page_size
result += process_query(count=false)
page += 1
count = result.length
end
result
end

def all
process_query unless @result
return @result if @result
process_query
@result
end
alias_method :to_a, :all
Expand Down
6 changes: 5 additions & 1 deletion lib/goo/config/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ def config(&block)
@settings.redis_host ||= ENV['REDIS_HOST'] || 'localhost'
@settings.redis_port ||= ENV['REDIS_PORT'] || 6379
@settings.bioportal_namespace ||= ENV['BIOPORTAL_NAMESPACE'] || 'http://data.bioontology.org/'
@settings.query_logging ||= ENV['QUERIES_LOGGING'] || false
@settings.query_logging_file||= ENV['QUERIES_LOGGING_FILE'] || './sparql.log'
@settings.queries_debug ||= ENV['QUERIES_DEBUG'] || false
@settings.slice_loading_size ||= ENV['GOO_SLICES'] || 500
puts "(GOO) >> Using RDF store (#{@settings.goo_backend_name}) #{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}"
puts "(GOO) >> Using term search server at #{@settings.search_server_url}"
puts "(GOO) >> Using Redis instance at #{@settings.redis_host}:#{@settings.redis_port}"
puts "(GOO) >> Using Query logging: #{@settings.query_logging_file}" if @settings.query_logging

connect_goo
end
Expand All @@ -42,9 +45,10 @@ def connect_goo
query: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_query}",
data: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_data}",
update: "http://#{@settings.goo_host}:#{@settings.goo_port}#{@settings.goo_path_update}",
options: { rules: :NONE })
options: { rules: :NONE})
conf.add_search_backend(:main, service: @settings.search_server_url)
conf.add_redis_backend(host: @settings.goo_redis_host, port: @settings.goo_redis_port)
conf.add_query_logger(enabled: @settings.query_logging, file: @settings.query_logging_file)

conf.add_namespace(:omv, RDF::Vocabulary.new("http://omv.org/ontology/"))
conf.add_namespace(:skos, RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#"))
Expand Down
9 changes: 8 additions & 1 deletion lib/goo/sparql/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,19 @@ def append_triples_no_bnodes(graph, file_path, mime_type_in)
else
bnodes_filter, dir = bnodes_filter_file(file_path, mime_type_in)
end
chunk_lines = 50_000 # number of line

if Goo.backend_vo? || Goo.backend_ag?
chunk_lines = 50_000 # number of line
else
chunk_lines = 500_000 # number of line
end

file = File.foreach(bnodes_filter)
lines = []
line_count = 0
file.each_entry do |line|
lines << line

if lines.size == chunk_lines
response = append_triples_batch(graph, lines, mime_type_in, line_count)
line_count += lines.size
Expand Down
54 changes: 54 additions & 0 deletions test/test_logging.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
require_relative 'test_case'
require_relative 'models'

class TestLogging < MiniTest::Unit::TestCase

def self.before_suite
GooTestData.create_test_case_data
Goo.use_cache = true
Goo.redis_client.flushdb
Goo.add_query_logger(enabled: true, file: "test.log")
end

def self.after_suite
GooTestData.delete_test_case_data
Goo.add_query_logger(enabled: false, file: nil)
File.delete("test.log") if File.exist?("test.log")
Goo.redis_client.flushdb
Goo.use_cache = false
end

def setup
Goo.redis_client.flushdb
end

def test_logging
Goo.logger.info("Test logging")
University.all
recent_logs = Goo.logger.get_logs
assert_equal 3, recent_logs.length
assert recent_logs.any? { |x| x['query'].include?("Test logging") }
assert File.read("test.log").include?("Test logging")
end

def test_last_10s_logs
Goo.logger.info("Test logging 2")
University.all
recent_logs = Goo.logger.queries_last_n_seconds(1)
assert_equal 3, recent_logs.length
assert recent_logs.any? { |x| x['query'].include?("Test logging 2") }
assert File.read("test.log").include?("Test logging 2")
sleep 1
recent_logs = Goo.logger.queries_last_n_seconds(1)
assert_equal 0, recent_logs.length
end

def test_auto_clean_logs
Goo.logger.info("Test logging 3")
(1..3000).each do |_i|
University.all
end
recent_logs = Goo.logger.get_logs
assert recent_logs.length < 2000
end
end

0 comments on commit b8eb3d0

Please sign in to comment.