diff --git a/src/main/scala/org/phenoscape/kb/Similarity.scala b/src/main/scala/org/phenoscape/kb/Similarity.scala index 1b0a49e1..d916a4f5 100644 --- a/src/main/scala/org/phenoscape/kb/Similarity.scala +++ b/src/main/scala/org/phenoscape/kb/Similarity.scala @@ -271,12 +271,10 @@ object Similarity { def frequency(terms: Set[IRI], corpus: IRI): Future[TermFrequencyTable] = { import scalaz.Scalaz._ - corpus match { - case TaxaCorpus => - val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql"" - val query: QueryText = - sparql""" - PREFIX xsd: + val values = if (terms.nonEmpty) terms.map(t => sparql" $t ").reduce(_ |+| _) else sparql"" + val query: QueryText = corpus match { + case TaxaCorpus => + sparql""" SELECT ?term (COUNT(DISTINCT ?profile) AS ?count) FROM $KBMainGraph WHERE { @@ -288,11 +286,24 @@ object Similarity { } GROUP BY ?term """ - App.executeSPARQLQueryString(query.text, qs => - IRI.create(qs.getResource("term").getURI) -> - qs.getLiteral("count").getInt).map(_.toMap) - case _ => Future.successful(Map.empty) + case GenesCorpus => + sparql""" + SELECT ?term (COUNT(DISTINCT ?profile) AS ?count) + FROM $KBMainGraph + WHERE { + VALUES ?term { $values } + ?profile ^$has_phenotypic_profile ?obj . + FILTER NOT EXISTS { ?obj $rdfsIsDefinedBy $VTO . } + GRAPH $KBClosureGraph { + ?profile $rdfType ?term . + } + } + GROUP BY ?term + """ } + App.executeSPARQLQueryString(query.text, qs => + IRI.create(qs.getResource("term").getURI) -> + qs.getLiteral("count").getInt).map(_.toMap) } type TermFrequencyTable = Map[IRI, Int]