Skip to content

Commit

Permalink
Merge pull request #62 from phenoscape/output-scores
Browse files Browse the repository at this point in the history
Output score file for regression directly.
  • Loading branch information
Shalsh23 authored May 6, 2020
2 parents 21b8625 + 7dd5ead commit 2fb6054
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ object RunPairwiseOWLSim extends App {
val ontfile = new File(args(2))
val profilesFile = new File(args(3))
val corpus = args(4)
val outfile = args(5)
val triplesOutfile = args(5)
val tsvOutfile = args(6)

val manager = OWLManager.createOWLOntologyManager()
val ontology = manager.loadOntologyFromOntologyDocument(ontfile)
Expand All @@ -39,6 +40,6 @@ object RunPairwiseOWLSim extends App {
val startIndex = (taskNum - 1) * groupSize
val group = orderedProfiles.drop(startIndex).take(groupSize)
println("Computing similarity matrix")
owlSim.computeAllSimilarityToCorpusDirectOutput(group.toSet, new File(outfile))
owlSim.computeAllSimilarityToCorpusDirectOutput(group.toSet, new File(triplesOutfile), new File(tsvOutfile))
println("Done: " + new Date())
}
25 changes: 18 additions & 7 deletions src/main/scala/org/phenoscape/owl/sim/OWLsim.scala
Original file line number Diff line number Diff line change
Expand Up @@ -96,27 +96,37 @@ class OWLsim(ontology: OWLOntology, inCorpus: OWLNamedIndividual => Boolean) {
def computeAllSimilarityToCorpus(inputs: Set[OWLNamedIndividual]): Set[Statement] = (for {
inputProfile <- inputs.toParArray
corpusProfile <- individualsInCorpus.toParArray
triple <- groupWiseSimilarity(inputProfile, corpusProfile).toTriples
(_, triples) = groupWiseSimilarity(inputProfile, corpusProfile).toTriples
triple <- triples
} yield triple).toSet.seq

def computeAllSimilarityToCorpusDirectOutput(inputs: Set[OWLNamedIndividual], outfile: File): Unit = {
def computeAllSimilarityToCorpusDirectOutput(inputs: Set[OWLNamedIndividual], triplesOutfile: File, tsvOutfile: File): Unit = {
import monix.execution.Scheduler.Implicits.global
val outputStream = new FileOutputStream(outfile)
val tsvWriter = new PrintWriter(tsvOutfile, "utf-8")
tsvWriter.println("?match\t?score\t?query\t?corpusprofile")
val outputStream = new FileOutputStream(triplesOutfile)
val rdfWriter = StreamRDFWriter.getWriterStream(outputStream, RDFFormat.TURTLE_FLAT)
rdfWriter.start()
val comparisons = for {
inputProfile <- Observable.fromIterable(inputs)
corpusProfile <- Observable.fromIterable(individualsInCorpus)
} yield (inputProfile, corpusProfile)
val processed = comparisons.mapParallelUnordered(Runtime.getRuntime.availableProcessors) { case (inputProfile, corpusProfile) =>
Task(groupWiseSimilarity(inputProfile, corpusProfile).toTriples)
Task {
val similarity = groupWiseSimilarity(inputProfile, corpusProfile)
val (comparison, triples) = similarity.toTriples
val tsvLine = s"$comparison\t${similarity.score}\t${similarity.queryIndividual.getIRI}\t${similarity.corpusIndividual.getIRI}"
(tsvLine, triples)
}
}
processed.foreachL { triples =>
processed.foreachL { case (tsvLine, triples) =>
//FIXME wasted conversions here
triples.foreach(triple => rdfWriter.triple(sesameTripleToJena(triple).asTriple))
tsvWriter.println(tsvLine)
}.runSyncUnsafe(Duration.Inf)
rdfWriter.finish()
outputStream.close()
tsvWriter.close()
}

def computeAllSimilarityToCorpusJ(inputs: Set[OWLNamedIndividual]): Map[(OWLNamedIndividual, OWLNamedIndividual), Double] = (for {
Expand Down Expand Up @@ -342,7 +352,7 @@ final case class GroupWiseSimilarity(queryIndividual: OWLNamedIndividual, corpus

import GroupWiseSimilarity._

def toTriples: Set[Statement] = {
def toTriples: (URI, Set[Statement]) = {
val self = new URIImpl(OntUtil.nextIRI.toString)
val micasTriples = for {
pair <- pairs
Expand All @@ -354,10 +364,11 @@ final case class GroupWiseSimilarity(queryIndividual: OWLNamedIndividual, corpus
node <- distinctSubsumers
term <- node.classes
} yield new StatementImpl(self, has_subsumer, new URIImpl(term.getIRI.toString))
Set(
val triples = Set(
new StatementImpl(self, combined_score, new NumericLiteralImpl(score)),
new StatementImpl(self, for_query_profile, new URIImpl(queryIndividual.getIRI.toString)),
new StatementImpl(self, for_corpus_profile, new URIImpl(corpusIndividual.getIRI.toString))) ++ subsumerTriples ++ micasTriples
self -> triples.toSet
}

}
Expand Down

0 comments on commit 2fb6054

Please sign in to comment.