Skip to content

Commit

Permalink
Merge pull request #206 from phenoscape/issue-192-similarity-frequency
Browse files Browse the repository at this point in the history
Add implementation for similarity/frequency for genes corpus
  • Loading branch information
balhoff authored Mar 13, 2020
2 parents 651819c + aa0fa04 commit 2261cf6
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions src/main/scala/org/phenoscape/kb/Similarity.scala
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,10 @@ object Similarity {

def frequency(terms: Set[IRI], corpus: IRI): Future[TermFrequencyTable] = {
import scalaz.Scalaz._
corpus match {
case TaxaCorpus =>
val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql""
val query: QueryText =
sparql"""
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql""
val query: QueryText = corpus match {
case TaxaCorpus =>
sparql"""
SELECT ?term (COUNT(DISTINCT ?profile) AS ?count)
FROM $KBMainGraph
WHERE {
Expand All @@ -288,11 +286,24 @@ object Similarity {
}
GROUP BY ?term
"""
App.executeSPARQLQueryString(query.text, qs =>
IRI.create(qs.getResource("term").getURI) ->
qs.getLiteral("count").getInt).map(_.toMap)
case _ => Future.successful(Map.empty)
case GenesCorpus =>
sparql"""
SELECT ?term (COUNT(DISTINCT ?profile) AS ?count)
FROM $KBMainGraph
WHERE {
VALUES ?term { $values }
?profile ^$has_phenotypic_profile ?obj .
FILTER NOT EXISTS { ?obj $rdfsIsDefinedBy $VTO . }
GRAPH $KBClosureGraph {
?profile $rdfType ?term .
}
}
GROUP BY ?term
"""
}
App.executeSPARQLQueryString(query.text, qs =>
IRI.create(qs.getResource("term").getURI) ->
qs.getLiteral("count").getInt).map(_.toMap)
}

type TermFrequencyTable = Map[IRI, Int]
Expand Down

0 comments on commit 2261cf6

Please sign in to comment.