forked from ontoportal/ontologies_linked_data
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
extract mapping count functions into a separated module
- Loading branch information
1 parent
f523fd5
commit 376c261
Showing
2 changed files
with
260 additions
and
237 deletions.
There are no files selected for viewing
259 changes: 259 additions & 0 deletions
259
lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
module LinkedData | ||
module Concerns | ||
module Mappings | ||
module Count | ||
def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr_acronyms = []) | ||
logger = nil unless enable_debug | ||
t = Time.now | ||
latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) | ||
counts = {} | ||
# Counting for External mappings | ||
t0 = Time.now | ||
external_uri = LinkedData::Models::ExternalClass.graph_uri | ||
exter_counts = mapping_ontologies_count(external_uri, nil, reload_cache = reload_cache) | ||
exter_total = 0 | ||
exter_counts.each do |k, v| | ||
exter_total += v | ||
end | ||
counts[external_uri.to_s] = exter_total | ||
if enable_debug | ||
logger.info("Time for External Mappings took #{Time.now - t0} sec. records #{exter_total}") | ||
end | ||
LinkedData.settings.interportal_hash ||= {} | ||
# Counting for Interportal mappings | ||
LinkedData.settings.interportal_hash.each_key do |acro| | ||
t0 = Time.now | ||
interportal_uri = LinkedData::Models::InterportalClass.graph_uri(acro) | ||
inter_counts = mapping_ontologies_count(interportal_uri, nil, reload_cache = reload_cache) | ||
inter_total = 0 | ||
inter_counts.each do |k, v| | ||
inter_total += v | ||
end | ||
counts[interportal_uri.to_s] = inter_total | ||
if enable_debug | ||
logger.info("Time for #{interportal_uri.to_s} took #{Time.now - t0} sec. records #{inter_total}") | ||
end | ||
end | ||
# Counting for mappings between the ontologies hosted by the BioPortal appliance | ||
i = 0 | ||
epr = Goo.sparql_query_client(:main) | ||
|
||
latest.each do |acro, sub| | ||
self.handle_triple_store_downtime(logger) if Goo.backend_4s? | ||
t0 = Time.now | ||
s_counts = self.mapping_ontologies_count(sub, nil, reload_cache = reload_cache) | ||
s_total = 0 | ||
|
||
s_counts.each do |k, v| | ||
s_total += v | ||
end | ||
counts[acro] = s_total | ||
i += 1 | ||
|
||
next unless enable_debug | ||
|
||
logger.info("#{i}/#{latest.count} " + | ||
"Retrieved #{s_total} records for #{acro} in #{Time.now - t0} seconds.") | ||
logger.flush | ||
end | ||
|
||
if enable_debug | ||
logger.info("Total time #{Time.now - t} sec.") | ||
logger.flush | ||
end | ||
return counts | ||
end | ||
|
||
def create_mapping_counts(logger, arr_acronyms = []) | ||
ont_msg = arr_acronyms.empty? ? "all ontologies" : "ontologies [#{arr_acronyms.join(', ')}]" | ||
|
||
time = Benchmark.realtime do | ||
create_mapping_count_totals_for_ontologies(logger, arr_acronyms) | ||
end | ||
logger.info("Completed rebuilding total mapping counts for #{ont_msg} in #{(time / 60).round(1)} minutes.") | ||
puts "create mappings total count time: #{time}" | ||
|
||
time = Benchmark.realtime do | ||
create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) | ||
end | ||
puts "create mappings pair count time: #{time}" | ||
logger.info("Completed rebuilding mapping count pairs for #{ont_msg} in #{(time / 60).round(1)} minutes.") | ||
end | ||
|
||
def create_mapping_count_totals_for_ontologies(logger, arr_acronyms) | ||
new_counts = mapping_counts(enable_debug = true, logger = logger, reload_cache = true, arr_acronyms) | ||
persistent_counts = {} | ||
f = Goo::Filter.new(:pair_count) == false | ||
|
||
LinkedData::Models::MappingCount.where.filter(f) | ||
.include(:ontologies, :count) | ||
.include(:all) | ||
.all | ||
.each do |m| | ||
persistent_counts[m.ontologies.first] = m | ||
end | ||
|
||
# ontologies = LinkedData::Models::Ontology.where.include(:acronym).all.map(&:acronym) | ||
# delete_zombie_mapping_count(persistent_counts.values, ontologies) | ||
|
||
num_counts = new_counts.keys.length | ||
ctr = 0 | ||
|
||
new_counts.each_key do |acr| | ||
new_count = new_counts[acr] | ||
ctr += 1 | ||
|
||
if persistent_counts.include?(acr) | ||
inst = persistent_counts[acr] | ||
if new_count.zero? | ||
inst.delete if inst.persistent? | ||
elsif new_count != inst.count | ||
inst.bring_remaining | ||
inst.count = new_count | ||
|
||
begin | ||
if inst.valid? | ||
inst.save | ||
else | ||
logger.error("Error updating mapping count for #{acr}: #{inst.id.to_s}. #{inst.errors}") | ||
next | ||
end | ||
rescue Exception => e | ||
logger.error("Exception updating mapping count for #{acr}: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") | ||
next | ||
end | ||
end | ||
else | ||
m = LinkedData::Models::MappingCount.new | ||
m.ontologies = [acr] | ||
m.pair_count = false | ||
m.count = new_count | ||
|
||
begin | ||
if m.valid? | ||
m.save | ||
else | ||
logger.error("Error saving new mapping count for #{acr}. #{m.errors}") | ||
next | ||
end | ||
rescue Exception => e | ||
logger.error("Exception saving new mapping count for #{acr}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") | ||
next | ||
end | ||
end | ||
remaining = num_counts - ctr | ||
logger.info("Total mapping count saved for #{acr}: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining..." : "All done!")) | ||
end | ||
end | ||
|
||
# This generates pair mapping counts for the given | ||
# ontologies to ALL other ontologies in the system | ||
def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) | ||
|
||
latest_submissions = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) | ||
ont_total = latest_submissions.length | ||
logger.info("There is a total of #{ont_total} ontologies to process...") | ||
ont_ctr = 0 | ||
# filename = 'mapping_pairs.ttl' | ||
# temp_dir = Dir.tmpdir | ||
# temp_file_path = File.join(temp_dir, filename) | ||
# temp_dir = '/Users/mdorf/Downloads/test/' | ||
# temp_file_path = File.join(File.dirname(file_path), "test.ttl") | ||
# fsave = File.open(temp_file_path, "a") | ||
ontologies = LinkedData::Models::Ontology.where.include(:acronym).all.map(&:acronym) | ||
|
||
latest_submissions.each do |acr, sub| | ||
self.handle_triple_store_downtime(logger) if Goo.backend_4s? | ||
new_counts = nil | ||
|
||
time = Benchmark.realtime do | ||
new_counts = self.mapping_ontologies_count(sub, nil, reload_cache = true) | ||
end | ||
logger.info("Retrieved new mapping pair counts for #{acr} in #{time} seconds.") | ||
ont_ctr += 1 | ||
persistent_counts = {} | ||
LinkedData::Models::MappingCount.where(pair_count: true).and(ontologies: acr) | ||
.include(:ontologies, :count).all.each do |m| | ||
other = m.ontologies.first | ||
|
||
if other == acr | ||
other = m.ontologies[1] | ||
end | ||
persistent_counts[other] = m | ||
end | ||
|
||
# delete_zombie_mapping_count(persistent_counts.values, ontologies) | ||
|
||
num_counts = new_counts.keys.length | ||
logger.info("Ontology: #{acr}. #{num_counts} mapping pair counts to record...") | ||
logger.info("------------------------------------------------") | ||
ctr = 0 | ||
|
||
new_counts.each_key do |other| | ||
new_count = new_counts[other] | ||
ctr += 1 | ||
|
||
if persistent_counts.include?(other) | ||
inst = persistent_counts[other] | ||
if new_count.zero? | ||
inst.delete | ||
elsif new_count != inst.count | ||
inst.bring_remaining | ||
inst.pair_count = true | ||
inst.count = new_count | ||
|
||
begin | ||
if inst.valid? | ||
inst.save() | ||
# inst.save({ batch: fsave }) | ||
else | ||
logger.error("Error updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{inst.errors}") | ||
next | ||
end | ||
rescue Exception => e | ||
logger.error("Exception updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") | ||
next | ||
end | ||
end | ||
else | ||
next unless ontologies.include?(other) | ||
|
||
m = LinkedData::Models::MappingCount.new | ||
m.count = new_count | ||
m.ontologies = [acr, other] | ||
m.pair_count = true | ||
puts "creating mapping count #{m.ontologies}" | ||
begin | ||
if m.valid? | ||
m.save() | ||
# m.save({ batch: fsave }) | ||
else | ||
logger.error("Error saving new mapping count for the pair [#{acr}, #{other}]. #{m.errors}") | ||
next | ||
end | ||
rescue Exception => e | ||
logger.error("Exception saving new mapping count for the pair [#{acr}, #{other}]. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") | ||
next | ||
end | ||
end | ||
remaining = num_counts - ctr | ||
logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining for #{acr}..." : "All done!")) | ||
wait_interval = 250 | ||
|
||
next unless (ctr % wait_interval).zero? | ||
|
||
sec_to_wait = 1 | ||
logger.info("Waiting #{sec_to_wait} second" << ((sec_to_wait > 1) ? 's' : '') << '...') | ||
sleep(sec_to_wait) | ||
end | ||
remaining_ont = ont_total - ont_ctr | ||
logger.info("Completed processing pair mapping counts for #{acr}. " << ((remaining_ont.positive?) ? "#{remaining_ont} ontologies remaining..." : "All ontologies processed!")) | ||
end | ||
# fsave.close | ||
end | ||
|
||
|
||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.