From 4327d7d9f2526d85316a5333c03bebcf0eb67178 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Mon, 19 Dec 2022 23:18:34 -0800 Subject: [PATCH 01/11] restore Uniprot query code to working --- resources/uniprot-config.edn | 14 ++++++++++++-- src/clj/org/parkerici/enflame/sparql.clj | 5 +++-- src/clj/org/parkerici/enflame/uniprot.clj | 13 +++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/resources/uniprot-config.edn b/resources/uniprot-config.edn index 8daf6d5..9b6c966 100644 --- a/resources/uniprot-config.edn +++ b/resources/uniprot-config.edn @@ -1,12 +1,22 @@ {:source {:type :sparql - :sparql-endpoint "https://sparql.uniprot.org/"} + :sparql-endpoint "https://sparql.uniprot.org/" + ;; TODO implement + :prefixes {uniprot "http://purl.uniprot.org/core/" + unipath "http://purl.uniprot.org/unipathway/" + unicite "http://purl.uniprot.org/citations/" + unidb "http://purl.uniprot.org/database" + dcterms "http://purl.org/dc/terms/" + unienzyme "http://purl.uniprot.org/enzyme/" + skos "http://www.w3.org/2004/02/skos/core#" + } + } :schema "resources/uniprot-alzabo.edn" :query-generator :sparql-generate :port 1992 :dev? true ;TODO control this somehow maybe aero/env :rh-cards [:query #_ :share - :compacted ;debug only + :compact ;debug only #_ :browser ;someday ] diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index 307338e..d653f35 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -83,8 +83,9 @@ ;;; Searching for usable endpoints -(def sparql-dbs (clojure.data.json/read-str (slurp "/Users/mtravers/Downloads/query (1).json") - :key-fn keyword)) +(def sparql-dbs + (clojure.data.json/read-str (slurp "scrap/sparql-endpoints.json") + :key-fn keyword)) (defn check-link diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index ad4a58d..8bf5c36 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -7,6 +7,19 @@ [clojure.set :as set] ) ) +;;; TODO get from config +(def endpoint "https://sparql.uniprot.org/") + +;;; → Multitool +(defn curried-api + [namespace arg1] ;TODO should take arb # args + `(do + ~@(for [[s v] (ns-publics namespace) + :when (:api (meta v))] + `(def ~s ~(partial v arg1))))) + +(eval (curried-api 'org.parkerici.enflame.sparql endpoint)) + ;;; These are silly (reg/prefix 'uniprot "http://purl.uniprot.org/core/") (reg/prefix 'unipath "http://purl.uniprot.org/unipathway/") From 8521311f09afbdc9cbe7a156acbfe6570b4e3c1e Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Tue, 20 Dec 2022 08:35:13 -0800 Subject: [PATCH 02/11] generated sparql gets labels --- src/cljc/org/parkerici/enflame/blockdefs.cljc | 2 +- .../parkerici/enflame/sparql/generate.cljc | 51 ++++++++++++++++--- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/cljc/org/parkerici/enflame/blockdefs.cljc b/src/cljc/org/parkerici/enflame/blockdefs.cljc index f06203a..6bf9681 100644 --- a/src/cljc/org/parkerici/enflame/blockdefs.cljc +++ b/src/cljc/org/parkerici/enflame/blockdefs.cljc @@ -223,7 +223,7 @@ (defn kind-field-blockdef [kind field invert?] (let [{:keys [type attribute]} (if invert? ;??? not sure about this - {:type field :attribute (keyword field kind)} + {:type field :attribute (keyword (name field) (name kind))} (field-def kind field))] (when-let [field-def (field-def-type field type)] (merge diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index f4c921c..4346903 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -5,13 +5,28 @@ [clojure.string :as str]) ) - +#_ +(schema/set-schema (org.parkerici.enflame.config/read-schema nil)) ;;; Pasted from candel and should be folded (def varcounter (atom {})) +;;; → Multitool +(defn safe-name [thing] + (when #?(:clj (instance? clojure.lang.Named thing) + :cljs (.-name thing)) + (name thing))) + +(defn s [thing] + (or (safe-name thing) + (str thing))) + +(defn symbol-conc + [& things] + (symbol (apply str (map s things)))) + (defn ?var [kind] (swap! varcounter update kind #(inc (or % 0))) (symbol (str "?" (name kind) (get @varcounter kind)))) @@ -46,14 +61,16 @@ ;;; From CANDEL, but modified +#_ (defn pull-include [var] (let [kind (var-kind var) - label (schema/kind-label kind)] + label (kind-label kind)] (if label [:db/id label] [:db/id]))) +#_ (defn find-term [var type] (u/de-ns @@ -66,6 +83,20 @@ +(defn label-var + [base-var] + (symbol-conc base-var "Label")) + + + +(defn select-terms + [var type] + (case (or type :include) ;default is :include + :omit nil + :include (list var (label-var var)) + :count `(count-distinct ~var) ;TODO + )) + ;;; Actual ;;; TODO copypasta from candel.query, could be abstracted up @@ -104,6 +135,10 @@ :type blockdefs/block-def)) +(defn kind-label + [kind] + :rdfs/label) + (defmethod build-query :query-builder-query [{:keys [current-var] :as _query} {:keys [top?] :as blockspec}] (let [{:keys [output]} (spec-block-def blockspec) @@ -117,18 +152,18 @@ subquery-selects (mapcat :select subqueries) subquery-wheres (mapcat :where subqueries) type-where `[~output-var :rdf/type ~output-rdf-type] + base-wheres (cons type-where subquery-wheres) subquery-filters (mapcat :filter subqueries) base-query ;; Do not understand this - {:select (if-let [term (find-term output-var output-type)] - (conj subquery-selects term) - subquery-selects) + {:select (concat (select-terms output-var output-type) + subquery-selects) :where (if-let [label-attribute (and top? (empty? subquery-wheres) - (schema/kind-label output))] - [[output-var label-attribute (?var label-attribute)]] ;TODO not sure about this - (cons type-where subquery-wheres)) + (kind-label output))] + (cons [output-var label-attribute (label-var output-var)] base-wheres) + base-wheres) :filter subquery-filters :current-var output-var }] From a72af746ab6e80a295e0216019c5fd5e18724812 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Tue, 20 Dec 2022 11:05:00 -0800 Subject: [PATCH 03/11] query results getting to client --- src/clj/org/parkerici/enflame/server.clj | 10 +++++++++- src/clj/org/parkerici/enflame/uniprot.clj | 1 + src/cljs/org/parkerici/enflame/datomic.cljs | 2 ++ src/cljs/org/parkerici/enflame/views.cljs | 11 +++++++++-- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/clj/org/parkerici/enflame/server.clj b/src/clj/org/parkerici/enflame/server.clj index 8c1463b..c55822d 100644 --- a/src/clj/org/parkerici/enflame/server.clj +++ b/src/clj/org/parkerici/enflame/server.clj @@ -1,5 +1,6 @@ (ns org.parkerici.enflame.server (:require [org.parkerici.enflame.datomic-relay :as datomic] + [org.parkerici.enflame.sparql :as sparql] [org.parkerici.enflame.download :as download] [org.parkerici.enflame.embed-server :as embed] [org.parkerici.enflame.admin :as admin] @@ -36,6 +37,13 @@ (catch Throwable e {:status 500 :headers {} :body {:error (print-str e)}})))) +(defn do-query + [db query args candelabra-token config] + ;; TODO Multimethod? + (case (:type (:source config)) + :candel (datomic/query db query args candelabra-token config) + :sparql (sparql/q (:sparql-endpoint (:source config)) query))) + (defn handle-query [req config] (let [{:keys [query args limit db]} (:params req) @@ -43,7 +51,7 @@ _args (if (u/nullish? args) [] (read-string args)) _limit (if (u/nullish? limit) nil (Integer. limit)) candelabra-token (get-in req [:cookies "candelabra-token" :value]) - results (datomic/query db _query _args candelabra-token config) + results (do-query db _query _args candelabra-token config) clipped (if _limit (take _limit results) results)] (response/response {:count (count results) :clipped (count clipped) :results clipped}))) diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 8bf5c36..2cf4785 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -8,6 +8,7 @@ ) ) ;;; TODO get from config +;;; Note: has teneded to disappear and reappear, restarting Wifi helps (def endpoint "https://sparql.uniprot.org/") ;;; → Multitool diff --git a/src/cljs/org/parkerici/enflame/datomic.cljs b/src/cljs/org/parkerici/enflame/datomic.cljs index dd45a42..27857a6 100644 --- a/src/cljs/org/parkerici/enflame/datomic.cljs +++ b/src/cljs/org/parkerici/enflame/datomic.cljs @@ -16,6 +16,7 @@ ;;; Handler gets spurious calls with 0 status and/or empty response, which we ignore. Not sure why that's happening ;;; Returns map with :count, :clipped, and :results +;;; TODO move to non-datomic file (defn do-query [ddb query args limit handler & [options]] (api/ajax-get "/api/query" @@ -33,6 +34,7 @@ (rf/reg-event-db :get-idents (fn [db [_ ddb]] + #_ ;TODO CANDEL specific (let [ddb (or ddb (:ddb db))] (do-query ddb '{:find (?x ?y), :where ([?x :db/ident ?y])} diff --git a/src/cljs/org/parkerici/enflame/views.cljs b/src/cljs/org/parkerici/enflame/views.cljs index 2980e32..3dad861 100644 --- a/src/cljs/org/parkerici/enflame/views.cljs +++ b/src/cljs/org/parkerici/enflame/views.cljs @@ -296,6 +296,8 @@ (rf/reg-sub :query-invalid? (fn [_ _] + nil + #_ ;TODO CANDEL Specific (let [query @(rf/subscribe [:query]) query-block @(rf/subscribe [:query-block])] (cond @@ -320,6 +322,7 @@ :candel/wick candel/wick-card :query query-card :compact compact-card + :compacted compact-card ;temp :xml xml-card :share library/share-card :browser obrowser/browser @@ -337,6 +340,10 @@ (toplink "Library" "/library")] [:div#accordian.accordian - (for [card (c/config :rh-cards)] ;; cards TODO not working yet because timing - ^{:key card}[(get card-defs card)]) + (for [card (c/config :rh-cards) + :let [cdef (get card-defs card)]] + (if cdef + ^{:key card}[cdef] + (throw (ex-info "No card defined for" {:card card})) + )) ]]) From 474810e01b841f91d3dbfdf660db57c204c758db Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Tue, 20 Dec 2022 15:01:29 -0800 Subject: [PATCH 04/11] query results displayed! Some temp hackery that needs to be fixed later --- resources/candel-config.edn | 2 +- resources/uniprot-alzabo.edn | 194 ++++++++++++++---- src/clj/org/parkerici/enflame/uniprot.clj | 15 +- .../parkerici/enflame/sparql/generate.cljc | 14 +- src/cljs/org/parkerici/enflame/datomic.cljs | 7 +- src/cljs/org/parkerici/enflame/views.cljs | 2 +- 6 files changed, 177 insertions(+), 57 deletions(-) diff --git a/resources/candel-config.edn b/resources/candel-config.edn index ef67964..28dfe74 100644 --- a/resources/candel-config.edn +++ b/resources/candel-config.edn @@ -12,7 +12,7 @@ :query :candel/wick :share - :compacted ;debug only + :compact ;debug only :browser ] :port 1991 ;Port on which to serve application diff --git a/resources/uniprot-alzabo.edn b/resources/uniprot-alzabo.edn index 26731da..c5f79be 100644 --- a/resources/uniprot-alzabo.edn +++ b/resources/uniprot-alzabo.edn @@ -1,3 +1,5 @@ +;;; TODO added labels by hand, should do that as part of generation + {:title "UNIPROT", :kinds {:Organelle {:title "Organelle", :fields {}, :uri :uniprot/Organelle}, @@ -5,7 +7,12 @@ {:doc "The catalytic activity of an enzyme.", :title "Catalytic Activity", :fields - {:catalyzedReaction + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string} + + :catalyzedReaction {:type :string, :uri :uniprot/catalyzedReaction, :attribute :uniprot/catalyzedReaction}, @@ -18,19 +25,28 @@ {:doc "The component of a proteome. e.g. Chromosome, Contig or Plasmid", :title "Proteome_Component", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Proteome_Component}, :Not_Obsolete {:doc "A class introduced to group all records that are currently in the database.", :title "Not Obsolete", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Not_Obsolete}, :Taxon {:doc "An element of a taxonomy for classifying life forms.", :title "Taxon", :fields - {:otherName + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:otherName {:type :string, :uri :uniprot/otherName, :attribute :uniprot/otherName}, @@ -68,23 +84,35 @@ :Participant {:doc "A participant in a protein-protein interaction.", :title "Interaction participant", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Participant}, :Obsolete {:doc "The class of all obsolete records in the database (i.e. records that where once published but are now removed).", :title "Obsolete", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Obsolete}, :Protein_Existence {:title "Protein existence evidence", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Protein_Existence}, :Sequence {:doc "An amino acid sequence.", :title "Sequence", :fields - {:mass + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:mass {:type :int, :uri :uniprot/mass, :attribute :uniprot/mass, @@ -138,13 +166,19 @@ {:doc "The use of this class has been replaced by Activity_Regulation_Annotation", :title "Enzyme Regulation", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Enzyme_Regulation_Annotation}, :Protein {:doc "Description of a protein.", :title "Protein", :fields - {:isolatedFrom + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:isolatedFrom {:type :Tissue, :uri :uniprot/isolatedFrom, :attribute :uniprot/isolatedFrom}, @@ -219,7 +253,10 @@ :Gene {:title "Gene", :fields - {:orfName + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:orfName {:type :string, :uri :uniprot/orfName, :attribute :uniprot/orfName}, @@ -231,13 +268,19 @@ :Concept {:doc "A concept used to classify resources.", :title "Concept", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Concept}, :Database {:doc "Metadata for a life science database.", :title "Database (description of)", :fields - {:implicit + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:implicit {:type :boolean, :uri :uniprot/implicit, :attribute :uniprot/implicit, @@ -287,7 +330,10 @@ "A resource that holds a set of the known names for this protein together.", :title "Structured_Name", :fields - {:structuredNameType + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:structuredNameType {:type :string, :uri :uniprot/structuredNameType, :attribute :uniprot/structuredNameType, @@ -298,14 +344,20 @@ {:doc "A Proteome that has been excluded from UniProtKB for some reason, normally described by statements with the predicate up:exclusionReason", :title "Excluded proteome", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Excluded_Proteome}, :Attribution {:doc "Entity used to attach evidence or provenance to a rdf statement via reification.", :title "Attribution", :fields - {:manual + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:manual {:type :boolean, :uri :uniprot/manual, :attribute :uniprot/manual}, :source {:type :string, :uri :uniprot/source, :attribute :uniprot/source}}, @@ -313,18 +365,27 @@ :Pathway {:doc "A hierarchical discription of a metabolic pathway.", :title "Pathway", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Pathway}, :Tissue {:doc "A tissue such as lung or heart.", :title "Tissue", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Tissue}, :Citation {:doc "Description of a publication from which data was obtained.", :title "Citation", :fields - {:institution + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:institution {:type :string, :uri :uniprot/institution, :attribute :uniprot/institution, @@ -388,7 +449,10 @@ {:doc "Description of a proteome.", :title "Proteome", :fields - {:panproteome + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:panproteome {:type :Proteome, :uri :uniprot/panproteome, :attribute :uniprot/panproteome, @@ -405,7 +469,10 @@ {:doc "Description of a protein-protein interaction.", :title "Interaction", :fields - {:participant + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:participant {:type :Participant, :uri :uniprot/participant, :attribute :uniprot/participant, @@ -425,15 +492,24 @@ :Strain {:doc "A strain of a species.", :title "Strain", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Strain}, - :Disease {:title "Disease", :fields {}, :uri :uniprot/Disease}, + :Disease {:title "Disease", :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Disease}, :Reviewed {:doc " The class of all reviewed records in the database (i.e. records that where looked at by a curator for integration into the database).", :title "Reviewed", :fields - {:nucleotideSequenceMappingIssue + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:nucleotideSequenceMappingIssue {:type :Nucleotide_Resource, :uri :uniprot/nucleotideSequenceMappingIssue, :attribute :uniprot/nucleotideSequenceMappingIssue, @@ -443,13 +519,19 @@ :Rank {:doc "A rank of a taxon.", :title "Taxon rank", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Rank}, :Cluster {:doc "Cluster of proteins with similar sequences.", :title "Cluster (UniRef)", :fields - {:member + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:member {:type :Sequence, :uri :uniprot/member, :attribute :uniprot/member, @@ -473,13 +555,19 @@ :Molecule {:doc "A biological molecule.", :title "Molecule", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Molecule}, :Resource {:doc "A life science resource.", :title "Resource", :fields - {:locatedOn + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:locatedOn {:type :Molecule, :uri :uniprot/locatedOn, :attribute :uniprot/locatedOn, @@ -517,14 +605,20 @@ :Method {:doc "An experimental method.", :title "Method", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Method}, :Enzyme {:doc "A specific catalytic activity, defined by the Enzyme Commission of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology (IUBMB).", :title "Enzyme", :fields - {:activity + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:activity {:type :Catalytic_Activity, :uri :uniprot/activity, :attribute :uniprot/activity, @@ -538,7 +632,10 @@ :Subcellular_Location {:title "Subcellular Location", :fields - {:relatedLocation + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:relatedLocation {:type :Subcellular_Location, :uri :uniprot/relatedLocation, :attribute :uniprot/relatedLocation}}, @@ -546,16 +643,25 @@ :Plasmid {:doc "Description of a Plasmid", :title "Plasmid", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Plasmid}, :Transposon {:doc "A transposon", :title "Transposon", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Transposon}, :Statement {:fields - {:context + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:context {:type :string, :uri :uniprot/context, :attribute :uniprot/context}, @@ -575,7 +681,10 @@ :Journal {:title "Journal", :fields - {:shortCoden + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:shortCoden {:type :string, :uri :uniprot/shortCoden, :attribute :uniprot/shortCoden, @@ -584,13 +693,19 @@ :Part {:doc "Description of a part of a protein.", :title "Protein part", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Part}, :Annotation {:doc "Description of a resource on a specific topic.", :title "Annotation", :fields - {:measuredActivity + {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}:measuredActivity {:type :string, :uri :uniprot/measuredActivity, :attribute :uniprot/measuredActivity, @@ -655,5 +770,8 @@ :Status {:doc "Indicator for the reliability of a piece of information.", :title "Status", - :fields {}, + :fields {:label + {:uri :rdfs/label + :attribute :rdfs/label + :type :string}}, :uri :uniprot/Status}}} diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 2cf4785..7ef10f7 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -170,13 +170,13 @@ :uniprot/Rank :uniprot/Reviewed) -(count (instances :uniprot/Pathway)) -3117 -(count (instances :uniprot/Disease)) -6202 -(count (instances :uniprot/Molecule)) -0 ;; uh uo - +(comment + (count (instances :uniprot/Pathway)) + ; 3117 + (count (instances :uniprot/Disease)) + ; 6202 whoops now 0, wtf? + (count (instances :uniprot/Molecule)) + ) (defn describe [ent] @@ -280,6 +280,7 @@ (keyword (name key)) key)) +;;; TODO add rdfs/label field ;;; TODO add skos etc fields (defn class-alzabo-fields [class] diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index 4346903..f46a2e3 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -115,7 +115,7 @@ [blockspec] #_ (reset-vars) (when blockspec - (let [{:keys [filter where select] :as built} + (let [{:keys [filter where find] :as built} (build-query {} (assoc blockspec :top? true)) base `(:bgp ~@where) filtered (if-not (empty? filter) @@ -123,7 +123,7 @@ ~base) base) ;; TODO throwing away pulls, need to implement those some other way - vars (map #(if (seq? %) (second %) %) select) + vars (map #(if (seq? %) (second %) %) find) ] (reset! tap built) ;; TODO @@ -149,14 +149,13 @@ ;; Not rdf/type, its pull etc output-type (keyword (get-in blockspec [:children "output"])) ;oneof :include :pull :count etc. subqueries (map (partial build-query {:current-var output-var}) constraints) - subquery-selects (mapcat :select subqueries) + subquery-selects (mapcat :find subqueries) subquery-wheres (mapcat :where subqueries) type-where `[~output-var :rdf/type ~output-rdf-type] base-wheres (cons type-where subquery-wheres) subquery-filters (mapcat :filter subqueries) base-query - ;; Do not understand this - {:select (concat (select-terms output-var output-type) + {:find (concat (select-terms output-var output-type) subquery-selects) :where (if-let [label-attribute (and top? @@ -169,13 +168,12 @@ }] base-query)) - (defmethod build-query :query-text-field [{:keys [current-var] :as query} blockspec] (let [{:keys [attribute] :as blockdef} (spec-block-def blockspec) value (query-value blockspec blockdef "V") var (?var (:attribute blockdef)) ;may not be used and uses up a number... - comp (query-value blockspec blockdef "comp")] + comp (keyword (query-value blockspec blockdef "comp"))] ;; would be better expressed with merge-recursive (-> query (update :where concat @@ -187,7 +185,7 @@ [[current-var attribute var]]) ) (update :filter concat - (when true ; -not (= comp :is) + (when-not (= comp :is) (u/de-ns `[(regex ~var ~(generate-regex comp value) "")]))) diff --git a/src/cljs/org/parkerici/enflame/datomic.cljs b/src/cljs/org/parkerici/enflame/datomic.cljs index 27857a6..e9b8aa2 100644 --- a/src/cljs/org/parkerici/enflame/datomic.cljs +++ b/src/cljs/org/parkerici/enflame/datomic.cljs @@ -123,8 +123,11 @@ :query-results (fn [db [_ {:keys [results count clipped]}]] (let [idents @(rf/subscribe [:idents]) - query-cols (:find @(rf/subscribe [:query])) - reshaped (results/reshape-results results idents query-cols)] + ;; CANDEL query-cols (:find @(rf/subscribe [:query])) + ;; assuming a certain form of query (:project ...) + query-cols (second @(rf/subscribe [:query])) + ;; TODO CANDEL reshaping not working, better to use raw for now + reshaped results #_ (results/reshape-results results idents query-cols)] (-> db (assoc :status :finished :results reshaped diff --git a/src/cljs/org/parkerici/enflame/views.cljs b/src/cljs/org/parkerici/enflame/views.cljs index 3dad861..5729aa8 100644 --- a/src/cljs/org/parkerici/enflame/views.cljs +++ b/src/cljs/org/parkerici/enflame/views.cljs @@ -296,7 +296,7 @@ (rf/reg-sub :query-invalid? (fn [_ _] - nil + false #_ ;TODO CANDEL Specific (let [query @(rf/subscribe [:query]) query-block @(rf/subscribe [:query-block])] From 1f11b00b0a3ea64d68d22b5771d67644d821d2b6 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Tue, 17 Jan 2023 16:48:30 -0800 Subject: [PATCH 05/11] Uniprot Alzabo schema in correct shape --- project.clj | 2 +- resources/uniprot-alzabo.edn | 564 ++++++++++++------ src/clj/org/parkerici/enflame/sparql.clj | 19 +- src/clj/org/parkerici/enflame/uniprot.clj | 155 ++++- .../parkerici/enflame/sparql/generate.cljc | 8 +- src/cljs/org/parkerici/enflame/views.cljs | 3 +- 6 files changed, 523 insertions(+), 228 deletions(-) diff --git a/project.clj b/project.clj index ba0fc8f..0b7d544 100644 --- a/project.clj +++ b/project.clj @@ -31,7 +31,7 @@ com.fasterxml.jackson.core/jackson-core]] [environ "1.1.0"] [me.raynes/fs "1.4.6"] - [org.parkerici/multitool "0.0.19"] + [org.parkerici/multitool "0.0.26"] [com.cemerick/url "0.1.1"] [org.clojure/data.xml "0.2.0-alpha6"] [org.clojure/clojurescript "1.10.520"] diff --git a/resources/uniprot-alzabo.edn b/resources/uniprot-alzabo.edn index c5f79be..9afcbe8 100644 --- a/resources/uniprot-alzabo.edn +++ b/resources/uniprot-alzabo.edn @@ -1,17 +1,15 @@ -;;; TODO added labels by hand, should do that as part of generation - {:title "UNIPROT", :kinds - {:Organelle {:title "Organelle", :fields {}, :uri :uniprot/Organelle}, + {:Organelle + {:title "Organelle", + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, + :uri :uniprot/Organelle}, :Catalytic_Activity {:doc "The catalytic activity of an enzyme.", :title "Catalytic Activity", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string} - + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :catalyzedReaction {:type :string, :uri :uniprot/catalyzedReaction, @@ -25,28 +23,21 @@ {:doc "The component of a proteome. e.g. Chromosome, Contig or Plasmid", :title "Proteome_Component", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Proteome_Component}, :Not_Obsolete {:doc "A class introduced to group all records that are currently in the database.", :title "Not Obsolete", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Not_Obsolete}, :Taxon {:doc "An element of a taxonomy for classifying life forms.", :title "Taxon", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:otherName + {:otherName {:type :string, :uri :uniprot/otherName, :attribute :uniprot/otherName}, @@ -54,12 +45,24 @@ {:type :string, :uri :uniprot/scientificName, :attribute :uniprot/scientificName}, + :reviewed + {:type :boolean, + :uri :uniprot/reviewed, + :attribute :uniprot/reviewed, + :doc + "Indicates whether a resource has been reviewed by a curator."}, :rank {:type :Rank, :uri :uniprot/rank, :attribute :uniprot/rank, :doc "The rank of a taxon."}, + :obsolete + {:type :boolean, + :uri :uniprot/obsolete, + :attribute :uniprot/obsolete, + :doc "True if this resource has been replaced or deleted."}, :host {:type :Taxon, :uri :uniprot/host, :attribute :uniprot/host}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :synonym {:type :string, :uri :uniprot/synonym, @@ -84,39 +87,39 @@ :Participant {:doc "A participant in a protein-protein interaction.", :title "Interaction participant", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Participant}, :Obsolete {:doc "The class of all obsolete records in the database (i.e. records that where once published but are now removed).", :title "Obsolete", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Obsolete}, :Protein_Existence {:title "Protein existence evidence", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Protein_Existence}, :Sequence {:doc "An amino acid sequence.", :title "Sequence", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:mass + {:organism + {:type :Taxon, + :uri :uniprot/organism, + :attribute :uniprot/organism, + :doc "The organism in which a protein occurs."}, + :mass {:type :int, :uri :uniprot/mass, :attribute :uniprot/mass, :doc "The predicted mass of a sequence in Daltons."}, + :representativeFor + {:type :Cluster, + :uri :uniprot/representativeFor, + :attribute :uniprot/representativeFor}, :precursor {:type :boolean, :uri :uniprot/precursor, @@ -126,11 +129,12 @@ :uri :uniprot/translatedFrom, :attribute :uniprot/translatedFrom}, :fragment - {:type :_e236e658c79333b7f7b0b10b43cb198c, + {:type :_934f2f4b1742160a7cabe73bfae412a8, :uri :uniprot/fragment, :attribute :uniprot/fragment, :doc "Indicates if a sequence is complete or consists of one or more fragments."}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :modification {:type :Alternative_Sequence_Annotation, :uri :uniprot/modification, @@ -139,7 +143,7 @@ :length {:type :int, :uri :uniprot/length, :attribute :uniprot/length}, :sequenceFor - {:type :_cface5270133a0bde29d9d542d4d53a6, + {:type :_341fcbedbe860321d6a43b60b0bc5fe3, :uri :uniprot/sequenceFor, :attribute :uniprot/sequenceFor, :doc "A resource that describes this sequence."}, @@ -157,6 +161,8 @@ :attribute :uniprot/basedOn, :doc "The sequence on which the description of a modified sequence is based."}, + :version + {:type :int, :uri :uniprot/version, :attribute :uniprot/version}, :seedFor {:type :Cluster, :uri :uniprot/seedFor, @@ -166,22 +172,44 @@ {:doc "The use of this class has been replaced by Activity_Regulation_Annotation", :title "Enzyme Regulation", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Enzyme_Regulation_Annotation}, :Protein {:doc "Description of a protein.", :title "Protein", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:isolatedFrom + {:submittedName + {:type :Structured_Name, + :uri :uniprot/submittedName, + :attribute :uniprot/submittedName, + :doc + "A name provided by the submitter of the underlying nucleotide sequence."}, + :organism + {:type :Taxon, + :uri :uniprot/organism, + :attribute :uniprot/organism, + :doc "The organism in which a protein occurs."}, + :isolatedFrom {:type :Tissue, :uri :uniprot/isolatedFrom, :attribute :uniprot/isolatedFrom}, + :representativeFor + {:type :Cluster, + :uri :uniprot/representativeFor, + :attribute :uniprot/representativeFor}, + :mnemonic + {:type :string, + :uri :uniprot/mnemonic, + :attribute :uniprot/mnemonic, + :doc + "A rememberable string that can be used to find entries, not a stable identifier!"}, + :reviewed + {:type :boolean, + :uri :uniprot/reviewed, + :attribute :uniprot/reviewed, + :doc + "Indicates whether a resource has been reviewed by a curator."}, :classifiedWith {:type :Concept, :uri :uniprot/classifiedWith, @@ -203,10 +231,24 @@ :attribute :uniprot/chainSequenceMapping, :doc "A mapping between a Sequence/Entry and aminoacids described in a PDB record."}, + :obsolete + {:type :boolean, + :uri :uniprot/obsolete, + :attribute :uniprot/obsolete, + :doc "True if this resource has been replaced or deleted."}, :interaction {:type :Interaction, :uri :uniprot/interaction, :attribute :uniprot/interaction}, + :created + {:type :date, + :uri :uniprot/created, + :attribute :uniprot/created, + :doc "The date a resource was created."}, + :structuredName + {:type :Structured_Name, + :uri :uniprot/structuredName, + :attribute :uniprot/structuredName}, :oldMnemonic {:type :string, :uri :uniprot/oldMnemonic, @@ -217,23 +259,76 @@ :uri :uniprot/component, :attribute :uniprot/component, :doc "A component of a protein."}, + :modified + {:type :date, + :uri :uniprot/modified, + :attribute :uniprot/modified, + :doc "The date a resource was last modified."}, :nucleotideSequenceMappingIssue {:type :Nucleotide_Resource, :uri :uniprot/nucleotideSequenceMappingIssue, :attribute :uniprot/nucleotideSequenceMappingIssue, :doc "When a CDS differs substantially from a reviewed UniProtKB/Swiss-Prot sequence, the UniProt curators indicate the nature of the difference in the corresponding cross-reference."}, + :alternativeName + {:type :Structured_Name, + :uri :uniprot/alternativeName, + :attribute :uniprot/alternativeName, + :doc "A synonym of the recommended name."}, + :conflictingSequence + {:type :External_Sequence, + :uri :uniprot/conflictingSequence, + :attribute :uniprot/conflictingSequence}, + :enzyme + {:type :Enzyme, + :uri :uniprot/enzyme, + :attribute :uniprot/enzyme, + :doc + "The catalytic activity associated with a protein. or part of a protein."}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :attribution + {:type :Attribution, + :uri :uniprot/attribution, + :attribute :uniprot/attribution}, + :recommendedName + {:type :Structured_Name, + :uri :uniprot/recommendedName, + :attribute :uniprot/recommendedName, + :doc "The name recommended by the UniProt consortium."}, + :sequence + {:type :Sequence, + :uri :uniprot/sequence, + :attribute :uniprot/sequence, + :doc "An amino acid sequence."}, :mappedCitation {:type :Citation, :uri :uniprot/mappedCitation, :attribute :uniprot/mappedCitation, :doc "A publication from which data was by a extracted by a mapping from non UniProt origin, or which contains additional information."}, + :replaces + {:type :_f1d2077fbda7736db042867774ef80f9, + :uri :uniprot/replaces, + :attribute :uniprot/replaces, + :doc "A resource that is replaced by this resource."}, + :mappedAnnotation + {:type :Annotation, + :uri :uniprot/mappedAnnotation, + :attribute :uniprot/mappedAnnotation, + :doc + "Maps annotation to a resource. Used to link annotations/comments from external non UniProt sources via Citations to UniProt resources."}, :domain {:type :Part, :uri :uniprot/domain, :attribute :uniprot/domain, :doc "A domain of a protein."}, + :version + {:type :int, :uri :uniprot/version, :attribute :uniprot/version}, + :replacedBy + {:type :_8e5f52771e3b776839235d09ea0f4a4d, + :uri :uniprot/replacedBy, + :attribute :uniprot/replacedBy, + :doc "A resource that replaces this resource."}, :encodedBy {:type :Gene, :uri :uniprot/encodedBy, @@ -253,10 +348,8 @@ :Gene {:title "Gene", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:orfName + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :orfName {:type :string, :uri :uniprot/orfName, :attribute :uniprot/orfName}, @@ -268,29 +361,14 @@ :Concept {:doc "A concept used to classify resources.", :title "Concept", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Concept}, :Database {:doc "Metadata for a life science database.", :title "Database (description of)", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:implicit - {:type :boolean, - :uri :uniprot/implicit, - :attribute :uniprot/implicit, - :doc "True if existance of this resource can be inferred."}, - :abstract - {:type :boolean, - :uri :uniprot/abstract, - :attribute :uniprot/abstract, - :doc "True if the class does not have any direct instances."}, - :category + {:category {:type :string, :uri :uniprot/category, :attribute :uniprot/category}, @@ -300,40 +378,55 @@ :attribute :uniprot/linkIsExplicit, :doc "True if the Database is linked by an explicit action to UniProt, false if it is done using a simple hardcoded rule."}, - :pattern + :uriTemplate {:type :string, - :uri :uniprot/pattern, - :attribute :uniprot/pattern, + :uri :uniprot/uriTemplate, + :attribute :uniprot/uriTemplate, :doc - "A URL pattern, used to generate links by substituting an identifier."}, + "An string template that can be used to figure out from the database id what uri desribes it."}, + :abstract + {:type :boolean, + :uri :uniprot/abstract, + :attribute :uniprot/abstract, + :doc "True if the class does not have any direct instances."}, :curated {:type :boolean, :uri :uniprot/curated, :attribute :uniprot/curated, :doc "If true the described database has some level of curation."}, + :citation + {:type :Citation, + :uri :uniprot/citation, + :attribute :uniprot/citation, + :doc + "A publication from which data was extracted, or which contains additional information."}, :urlTemplate {:type :string, :uri :uniprot/urlTemplate, :attribute :uniprot/urlTemplate, :doc "An string template that can be used to figure out from the database id what html page talks about it."}, - :uriTemplate + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :implicit + {:type :boolean, + :uri :uniprot/implicit, + :attribute :uniprot/implicit, + :doc "True if existance of this resource can be inferred."}, + :pattern {:type :string, - :uri :uniprot/uriTemplate, - :attribute :uniprot/uriTemplate, + :uri :uniprot/pattern, + :attribute :uniprot/pattern, :doc - "An string template that can be used to figure out from the database id what uri desribes it."}}, + "A URL pattern, used to generate links by substituting an identifier."}}, :uri :uniprot/Database}, :Structured_Name {:doc "A resource that holds a set of the known names for this protein together.", :title "Structured_Name", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:structuredNameType + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :structuredNameType {:type :string, :uri :uniprot/structuredNameType, :attribute :uniprot/structuredNameType, @@ -344,52 +437,45 @@ {:doc "A Proteome that has been excluded from UniProtKB for some reason, normally described by statements with the predicate up:exclusionReason", :title "Excluded proteome", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Excluded_Proteome}, :Attribution {:doc "Entity used to attach evidence or provenance to a rdf statement via reification.", :title "Attribution", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:manual + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :manual {:type :boolean, :uri :uniprot/manual, :attribute :uniprot/manual}, + :date + {:type :string, :uri :uniprot/date, :attribute :uniprot/date}, :source {:type :string, :uri :uniprot/source, :attribute :uniprot/source}}, :uri :uniprot/Attribution}, :Pathway {:doc "A hierarchical discription of a metabolic pathway.", :title "Pathway", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Pathway}, :Tissue {:doc "A tissue such as lung or heart.", :title "Tissue", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Tissue}, :Citation {:doc "Description of a publication from which data was obtained.", :title "Citation", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:institution + {:institution {:type :string, :uri :uniprot/institution, :attribute :uniprot/institution, :doc "The institution at which a thesis was written."}, + :date + {:type :string, :uri :uniprot/date, :attribute :uniprot/date}, :group {:type :string, :uri :uniprot/group, @@ -400,6 +486,12 @@ :uri :uniprot/publisher, :attribute :uniprot/publisher, :doc "The publisher of a book."}, + :place + {:type :string, + :uri :uniprot/place, + :attribute :uniprot/place, + :doc + "The place where a publication was published. This usually includes a country name."}, :publishedIn {:type :Journal, :uri :uniprot/publishedIn, @@ -420,6 +512,16 @@ :uri :uniprot/title, :attribute :uniprot/title, :doc "The title of a publication."}, + :pages + {:type :string, + :uri :uniprot/pages, + :attribute :uniprot/pages, + :doc "The first and last page for a chapter or article."}, + :volume + {:type :string, + :uri :uniprot/volume, + :attribute :uniprot/volume, + :doc "The volume a publication is part of."}, :author {:type :string, :uri :uniprot/author, @@ -430,6 +532,7 @@ :uri :uniprot/erratum, :attribute :uniprot/erratum, :doc "An erratum for a publication."}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :authorsIncomplete {:type :boolean, :uri :uniprot/authorsIncomplete, @@ -449,10 +552,8 @@ {:doc "Description of a proteome.", :title "Proteome", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:panproteome + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :panproteome {:type :Proteome, :uri :uniprot/panproteome, :attribute :uniprot/panproteome, @@ -469,10 +570,8 @@ {:doc "Description of a protein-protein interaction.", :title "Interaction", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:participant + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :participant {:type :Participant, :uri :uniprot/participant, :attribute :uniprot/participant, @@ -492,24 +591,21 @@ :Strain {:doc "A strain of a species.", :title "Strain", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Strain}, - :Disease {:title "Disease", :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, :uri :uniprot/Disease}, + :Disease + {:title "Disease", + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, + :uri :uniprot/Disease}, :Reviewed {:doc " The class of all reviewed records in the database (i.e. records that where looked at by a curator for integration into the database).", :title "Reviewed", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:nucleotideSequenceMappingIssue + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :nucleotideSequenceMappingIssue {:type :Nucleotide_Resource, :uri :uniprot/nucleotideSequenceMappingIssue, :attribute :uniprot/nucleotideSequenceMappingIssue, @@ -519,31 +615,38 @@ :Rank {:doc "A rank of a taxon.", :title "Taxon rank", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Rank}, :Cluster {:doc "Cluster of proteins with similar sequences.", :title "Cluster (UniRef)", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:member + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :member {:type :Sequence, :uri :uniprot/member, :attribute :uniprot/member, :doc "One of several similar resources."}, + :mnemonic + {:type :string, + :uri :uniprot/mnemonic, + :attribute :uniprot/mnemonic, + :doc + "A rememberable string that can be used to find entries, not a stable identifier!"}, :someMembersClassifiedWith - {:type :_8cb1885ea52378ca17784343e66f9edf, + {:type :_3c618eea3d8a02bf75309716a05b94be, :uri :uniprot/someMembersClassifiedWith, :attribute :uniprot/someMembersClassifiedWith, :doc "Indicates which GO terms are somewhat consistently used to annotate UniProtKB members of this cluster."}, + :modified + {:type :date, + :uri :uniprot/modified, + :attribute :uniprot/modified, + :doc "The date a resource was last modified."}, :identity - {:type :_3b833095004c50f60e1e354c936acbb6, + {:type :_fe11e2bb13384b4238907a7d7b640362, :uri :uniprot/identity, :attribute :uniprot/identity, :doc "The level of sequence identity in a cluster."}, @@ -555,70 +658,82 @@ :Molecule {:doc "A biological molecule.", :title "Molecule", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Molecule}, :Resource {:doc "A life science resource.", :title "Resource", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:locatedOn + {:method + {:type :Method, + :uri :uniprot/method, + :attribute :uniprot/method, + :doc "The experimental method that was used."}, + :translatedTo + {:type :string, + :uri :uniprot/translatedTo, + :attribute :uniprot/translatedTo}, + :created + {:type :date, + :uri :uniprot/created, + :attribute :uniprot/created, + :doc "The date a resource was created."}, + :resolution + {:type :float, + :uri :uniprot/resolution, + :attribute :uniprot/resolution, + :doc "The resolution of an experiment, in Angstrom."}, + :locatedOn {:type :Molecule, :uri :uniprot/locatedOn, :attribute :uniprot/locatedOn, :doc "The molecule a this resource is located on."}, - :transcribedFrom - {:type :string, - :uri :uniprot/transcribedFrom, - :attribute :uniprot/transcribedFrom}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :database + {:type :Database, + :uri :uniprot/database, + :attribute :uniprot/database}, :sequenceDiscrepancy {:type :string, :uri :uniprot/sequenceDiscrepancy, :attribute :uniprot/sequenceDiscrepancy, :doc "Used when a CoDing Sequences (CDS) from the INSDC differs substantially from a reviewed UniProtKB/Swiss-Prot sequence, the UniProt curators indicate the nature of the difference as a rdfs:comment linked via this predicate."}, - :database - {:type :Database, - :uri :uniprot/database, - :attribute :uniprot/database}, - :translatedTo - {:type :string, - :uri :uniprot/translatedTo, - :attribute :uniprot/translatedTo}, - :resolution - {:type :float, - :uri :uniprot/resolution, - :attribute :uniprot/resolution, - :doc "The resolution of an experiment, in Angstrom."}, :signatureSequenceMatch {:type :string, :uri :uniprot/signatureSequenceMatch, :attribute :uniprot/signatureSequenceMatch, :doc - "Indicates that the signature described by the subject resource matches mathematically and that that match is described by the object of a triple using this property as predicate."}}, + "Indicates that the signature described by the subject resource matches mathematically and that that match is described by the object of a triple using this property as predicate."}, + :transcribedFrom + {:type :string, + :uri :uniprot/transcribedFrom, + :attribute :uniprot/transcribedFrom}}, :uri :uniprot/Resource}, :Method {:doc "An experimental method.", :title "Method", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Method}, :Enzyme {:doc "A specific catalytic activity, defined by the Enzyme Commission of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology (IUBMB).", :title "Enzyme", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:activity + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :replaces + {:type :_f1d2077fbda7736db042867774ef80f9, + :uri :uniprot/replaces, + :attribute :uniprot/replaces, + :doc "A resource that is replaced by this resource."}, + :replacedBy + {:type :_8e5f52771e3b776839235d09ea0f4a4d, + :uri :uniprot/replacedBy, + :attribute :uniprot/replacedBy, + :doc "A resource that replaces this resource."}, + :activity {:type :Catalytic_Activity, :uri :uniprot/activity, :attribute :uniprot/activity, @@ -632,36 +747,52 @@ :Subcellular_Location {:title "Subcellular Location", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:relatedLocation + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :orientation + {:type :Orientation, + :uri :uniprot/orientation, + :attribute :uniprot/orientation}, + :topology + {:type :Topology, + :uri :uniprot/topology, + :attribute :uniprot/topology}, + :relatedLocation {:type :Subcellular_Location, :uri :uniprot/relatedLocation, - :attribute :uniprot/relatedLocation}}, + :attribute :uniprot/relatedLocation}, + :cellularComponent + {:type :Cellular_Component, + :uri :uniprot/cellularComponent, + :attribute :uniprot/cellularComponent}, + :citation + {:type :Citation, + :uri :uniprot/citation, + :attribute :uniprot/citation, + :doc + "A publication from which data was extracted, or which contains additional information."}}, :uri :uniprot/Subcellular_Location}, :Plasmid {:doc "Description of a Plasmid", :title "Plasmid", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Plasmid}, :Transposon {:doc "A transposon", :title "Transposon", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Transposon}, :Statement {:fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:context + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :mappedAnnotation + {:type :Annotation, + :uri :uniprot/mappedAnnotation, + :attribute :uniprot/mappedAnnotation, + :doc + "Maps annotation to a resource. Used to link annotations/comments from external non UniProt sources via Citations to UniProt resources."}, + :context {:type :string, :uri :uniprot/context, :attribute :uniprot/context}, @@ -681,10 +812,8 @@ :Journal {:title "Journal", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:shortCoden + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :shortCoden {:type :string, :uri :uniprot/shortCoden, :attribute :uniprot/shortCoden, @@ -693,19 +822,50 @@ :Part {:doc "Description of a part of a protein.", :title "Protein part", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :submittedName + {:type :Structured_Name, + :uri :uniprot/submittedName, + :attribute :uniprot/submittedName, + :doc + "A name provided by the submitter of the underlying nucleotide sequence."}, + :enzyme + {:type :Enzyme, + :uri :uniprot/enzyme, + :attribute :uniprot/enzyme, + :doc + "The catalytic activity associated with a protein. or part of a protein."}, + :recommendedName + {:type :Structured_Name, + :uri :uniprot/recommendedName, + :attribute :uniprot/recommendedName, + :doc "The name recommended by the UniProt consortium."}, + :structuredName + {:type :Structured_Name, + :uri :uniprot/structuredName, + :attribute :uniprot/structuredName}, + :alternativeName + {:type :Structured_Name, + :uri :uniprot/alternativeName, + :attribute :uniprot/alternativeName, + :doc "A synonym of the recommended name."}}, :uri :uniprot/Part}, :Annotation {:doc "Description of a resource on a specific topic.", :title "Annotation", :fields - {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}:measuredActivity + {:substitution + {:type :string, + :uri :uniprot/substitution, + :attribute :uniprot/substitution, + :doc "A replacement sequence."}, + :method + {:type :Method, + :uri :uniprot/method, + :attribute :uniprot/method, + :doc "The experimental method that was used."}, + :measuredActivity {:type :string, :uri :uniprot/measuredActivity, :attribute :uniprot/measuredActivity, @@ -735,10 +895,20 @@ :uri :uniprot/certain, :attribute :uniprot/certain, :doc "False if there is any uncertainty about a statement."}, + :conflictingSequence + {:type :External_Sequence, + :uri :uniprot/conflictingSequence, + :attribute :uniprot/conflictingSequence}, + :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :catalyticActivity {:type :Catalytic_Activity, :uri :uniprot/catalyticActivity, :attribute :uniprot/catalyticActivity}, + :sequence + {:type :Sequence, + :uri :uniprot/sequence, + :attribute :uniprot/sequence, + :doc "An amino acid sequence."}, :measuredError {:type :float, :uri :uniprot/measuredError, @@ -754,7 +924,7 @@ :uri :uniprot/disease, :attribute :uniprot/disease}, :locatedIn - {:type :_58cedf931b81a87fcf4885f1ee126ad6, + {:type :_f1cd6d4fb521d72fcc3b9cadda2e2178, :uri :uniprot/locatedIn, :attribute :uniprot/locatedIn}, :measuredValue @@ -770,8 +940,6 @@ :Status {:doc "Indicator for the reliability of a piece of information.", :title "Status", - :fields {:label - {:uri :rdfs/label - :attribute :rdfs/label - :type :string}}, + :fields + {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}}, :uri :uniprot/Status}}} diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index d653f35..755892d 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -280,6 +280,14 @@ `(:bgp [?s ~att ?o])) ) +(defn parse-sparql + "Parse SPARQL query into Jena sexp form. " + [sparql] + (read-string (str (q/parse sparql)))) + +#_ +(parse-sparql "select ?a ?b ?c where {?a ?b ?c}") + (comment ;;; Look here for examples @@ -293,4 +301,13 @@ (def x (sq/q endpoint '(:distinct (:project [?o] (:bgp [?s :rdf/type :prokino/LigandActivity] [?s :prokino/hasMOA ?o]))))) -) + + +;;; Not implemented?? +(q '[:conditional [:bgp [:uniprot/organism :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3]] [:bgp [?d3 ?p3 ?d4]]])) + + + + + + diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 7ef10f7..00d786d 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -11,7 +11,7 @@ ;;; Note: has teneded to disappear and reappear, restarting Wifi helps (def endpoint "https://sparql.uniprot.org/") -;;; → Multitool +;;; → Multitool - but shouldn't it be a macro rather than having to call eval TODO (defn curried-api [namespace arg1] ;TODO should take arb # args `(do @@ -23,37 +23,57 @@ ;;; These are silly (reg/prefix 'uniprot "http://purl.uniprot.org/core/") +(reg/prefix 'uniprotein "http://purl.uniprot.org/uniprot/") (reg/prefix 'unipath "http://purl.uniprot.org/unipathway/") (reg/prefix 'unicite "http://purl.uniprot.org/citations/") (reg/prefix 'unidb "http://purl.uniprot.org/database") (reg/prefix 'dcterms "http://purl.org/dc/terms/") (reg/prefix 'unienzyme "http://purl.uniprot.org/enzyme/") +(reg/prefix 'taxon "http://purl.uniprot.org/taxonomy/") (reg/prefix 'skos "http://www.w3.org/2004/02/skos/core#") ;;; Try (this does not seem to work, sigh) (reg/prefix 'uniuni "http://purl.uniprot.org/") -(defn uniprot-q - [sparql] - (if (string? sparql) - (sq/do-query (sq/sparql-source "https://sparql.uniprot.org/") sparql) - (uniprot-q (sq/->sparql sparql :limit 100000)))) ;TODO limit temp :limit 1000 - (def external-ontology '{:rdf/Statement {:rdf/type (:owl/Class)}}) + +(defn fix-domain + [att] + (->> (concat + (map :d1 (q `[:bgp [~att :rdfs/domain ?d1]])) + (map :d2 (remove #(keyword? (:d1 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2]]))) + (map :d3 (remove #(keyword? (:d2 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3]]))) + (map :d4 (remove #(keyword? (:d3 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3] [?d3 ?p3 ?d4]])))) + distinct + (filter #(and (keyword? %) (= "uniprot" (namespace %)))))) + +(defn fix-domains + [ontology-in] + (let [atts (->> ontology-in + (filter (fn [[k v]] (let [domain (:rdfs/domain v)] + (and domain (> (count domain) 1))))) + (map first))] + (prn :atts atts) + (reduce (fn [ontology att] + (assoc-in ontology [att :rdfs/domain] (fix-domain att))) + ontology-in + atts))) + ;;; TODO shouldn't run on compile -(defonce uniprot-ontology +(def uniprot-ontology (-> (sq/entify - (uniprot-q + (q '(:bgp [?s :rdfs/isDefinedBy ?uniprot] [?s ?p ?o]))) (merge external-ontology) ;; This one field comes back with an unserializable object, just patch it ;; Real thing (.-lexicalValue _) if need be - (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")))) + (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")) + fix-domains)) ;;; TODO damn I wish these were more composable @@ -103,12 +123,6 @@ [class] (filtered-by-any :rdfs/domain (all-subclasses class))) - - - - - - (defn uniprot? [ent] (and (keyword? ent) @@ -181,8 +195,8 @@ (defn describe [ent] (concat - (uniprot-q `(:bgp [~ent ?p ?o])) - (uniprot-q `(:bgp [?s ?p ~ent])))) + (q `(:bgp [~ent ?p ?o])) + (q `(:bgp [?s ?p ~ent])))) (comment @@ -280,12 +294,14 @@ (keyword (name key)) key)) -;;; TODO add rdfs/label field ;;; TODO add skos etc fields (defn class-alzabo-fields [class] (apply merge + {:label {:type :string + :uri :rdfs/label + :attribute :rdfs/label}} (for [[n d] (properties-for-domain class)] {(nons n) {:type (or (nons (first (:rdfs/range d))) @@ -294,7 +310,8 @@ :uri n :attribute n ;aka :uri, but this leverages existing mechanisms :doc (first (:rdfs/comment d))}} - ))) + ) + )) (defn alzabo [] @@ -327,3 +344,101 @@ [?protein :uniprot/classifiedWith ?concept] [?concept :rdfs/label ?clabel] [(regex ?clabel "FOO.*" "")]) + + +;;; Actual generated queries + +;;; The simplest +(comment + +(:project + (?Pathway5 ?Pathway5Label) + (:bgp + [?Pathway5 :rdfs/label ?Pathway5Label] + [?Pathway5 :rdf/type :uniprot/Pathway])) + +;;; With regex label filtering +(:project + (?Pathway9 ?Pathway9Label ?label11) + (:filter + (regex ?label11 ".*synth.*" "") + (:bgp + [?Pathway9 :rdf/type :uniprot/Pathway] + [?Pathway9 :rdfs/label ?label11]))) + +;;; Taxon +(:project + (?Taxon24 ?Taxon24Label ?scientificName20) + (:filter + (regex ?scientificName20 "^Tapinanthus.*" "") + (:bgp + [?Taxon24 :rdf/type :uniprot/Taxon] + [?Taxon24 :uniprot/scientificName ?scientificName20]))) + +;; Proteins from organisms (seems valid but takes like 30 minutes to complete) +(:project + (?Protein15 ?Protein15Label ?Taxon19 ?Taxon19Label ?scientificName9) + (:filter + (regex ?scientificName9 "^Tapinanthus.*" "") + (:bgp + [?Protein15 :rdf/type :uniprot/Protein] + [?Protein15 :uniprot/organism ?Taxon19] + [?Taxon19 :rdf/type :uniprot/Taxon] + [?Taxon19 :uniprot/scientificName ?scientificName9]))) +;;; "Elapsed time: 2596667.627792 msecs" (that's 43.5 fucking minutes!) + +;;; Ah but a mere 30 seconds if you actually do the right ghing: +(:project + (?Protein15 ?Protein15Label ?Taxon19 ?scientificName9) + (:filter + (regex ?scientificName9 "^Tapinanthus.*" "") + (:bgp + [?Protein15 :rdf/type :uniprot/Protein] + [?Protein15 :rdfs/label ?Protein15Label] + [?Protein15 :uniprot/organism ?Taxon19] + [?Taxon19 :rdf/type :uniprot/Taxon] + ;; Note: Taxons don't have :rdfs/label and you have to use differnt att! + ;[?Taxon19 :rdfs/label ?Taxon19Label] + [?Taxon19 :uniprot/scientificName ?scientificName9]))) +) + + + +;;; ❖⟐❖ fixing blank-node domains ❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖ + + + + +#_ +#:uniprot{:submittedName (:uniprot/Part :uniprot/Protein), + :organism (:uniprot/Protein :uniprot/Sequence), + :date (:uniprot/Attribution :uniprot/Citation), + :representativeFor (:uniprot/Protein :uniprot/Sequence), + :mnemonic (:uniprot/Cluster :uniprot/Protein), + :reviewed (:uniprot/Protein :uniprot/Taxon), + :place (:uniprot/Book_Citation :uniprot/Thesis_Citation), + :substitution (:uniprot/Mutagenesis_Annotation :uniprot/Natural_Variant_Annotation), + :method (:uniprot/Mass_Spectrometry_Annotation :uniprot/Structure_Resource), + :obsolete (:uniprot/Protein :uniprot/Taxon), + :orientation (:uniprot/Cellular_Component :uniprot/cellularComponent), + :created (:uniprot/Protein :uniprot/Resource), + :structuredName (:uniprot/Part :uniprot/Protein), + :modified (:uniprot/Cluster :uniprot/Protein), + :cellularComponent (:uniprot/Cellular_Component :uniprot/cellularComponent), + :pages (:uniprot/Book_Citation :uniprot/Journal_Citation), + :volume (:uniprot/Book_Citation :uniprot/Journal_Citation), + :citation (:uniprot/Cellular_Component :uniprot/Database), + :alternativeName (:uniprot/Part :uniprot/Protein), + :conflictingSequence (:uniprot/Protein :uniprot/Sequence_Caution_Annotation), + :enzyme (:uniprot/Part :uniprot/Protein), + :attribution (:uniprot/Protein), + :recommendedName (:uniprot/Part :uniprot/Protein), + :sequence (:uniprot/Annotation :uniprot/Protein), + :replaces (:uniprot/Enzyme :uniprot/Protein), + :mappedAnnotation (:uniprot/Citation_Statement :uniprot/Protein), + :version (:uniprot/Protein :uniprot/Sequence), + :replacedBy (:uniprot/Enzyme :uniprot/Protein), + :topology (:uniprot/Cellular_Component :uniprot/cellularComponent)} + + + diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index f46a2e3..fc9b2a6 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -13,14 +13,8 @@ (def varcounter (atom {})) -;;; → Multitool -(defn safe-name [thing] - (when #?(:clj (instance? clojure.lang.Named thing) - :cljs (.-name thing)) - (name thing))) - (defn s [thing] - (or (safe-name thing) + (or (u/safe-name thing) (str thing))) (defn symbol-conc diff --git a/src/cljs/org/parkerici/enflame/views.cljs b/src/cljs/org/parkerici/enflame/views.cljs index 5729aa8..7859875 100644 --- a/src/cljs/org/parkerici/enflame/views.cljs +++ b/src/cljs/org/parkerici/enflame/views.cljs @@ -258,7 +258,8 @@ [:button.btn.btn-primary.m-1 {:class (if left? "float-left" "float-right") :on-mouse-down #(rf/dispatch [:do-query query]) - :disabled (not (nil? @(rf/subscribe [:query-invalid?]))) + ;; TODO get this working again + ;; :disabled (boolean @(rf/subscribe [:query-invalid?])) :data-toggle "tooltip" ;; TODO add the js necessary to make this pretty (not sure it's worth it) (also see tooltip in Library pane) ;; :data-placement "top" From 3656780a23c280dd6e550d723e529d90b35126b6 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Thu, 26 Jan 2023 17:37:05 -0800 Subject: [PATCH 06/11] browser starting to work --- resources/uniprot-config.edn | 2 +- src/clj/org/parkerici/enflame/server.clj | 3 +- src/clj/org/parkerici/enflame/sparql.clj | 19 +++++++ src/clj/org/parkerici/enflame/uniprot.clj | 2 +- .../parkerici/enflame/sparql/generate.cljc | 6 +- src/cljs/org/parkerici/enflame/datomic.cljs | 42 +++++++++++++- .../org/parkerici/enflame/view/browser.cljs | 55 ++++++++++++++++++- .../org/parkerici/enflame/view/utils.cljs | 31 +++++------ src/cljs/org/parkerici/enflame/views.cljs | 2 + 9 files changed, 138 insertions(+), 24 deletions(-) diff --git a/resources/uniprot-config.edn b/resources/uniprot-config.edn index 9b6c966..13cad51 100644 --- a/resources/uniprot-config.edn +++ b/resources/uniprot-config.edn @@ -17,7 +17,7 @@ :rh-cards [:query #_ :share :compact ;debug only - #_ :browser ;someday + :browser ;someday ] } diff --git a/src/clj/org/parkerici/enflame/server.clj b/src/clj/org/parkerici/enflame/server.clj index c55822d..39e69e8 100644 --- a/src/clj/org/parkerici/enflame/server.clj +++ b/src/clj/org/parkerici/enflame/server.clj @@ -42,7 +42,8 @@ ;; TODO Multimethod? (case (:type (:source config)) :candel (datomic/query db query args candelabra-token config) - :sparql (sparql/q (:sparql-endpoint (:source config)) query))) + :sparql (sparql/unkw-results ;TEMP? + (sparql/q (:sparql-endpoint (:source config)) query)))) (defn handle-query [req config] diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index 755892d..3189ec4 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -54,6 +54,7 @@ (let [bindings (.getBinding rb) vars (iterator-seq (.vars bindings))] (zipmap (map #(keyword (.getVarName %)) vars) + ;; graph/data turns URIs into keywords, TODO might want to not do that, we end up throwing that away for the client-side operations (map #(graph/data (.get bindings %)) vars)))) (defn do-query [source q] @@ -311,3 +312,21 @@ +;;; Convert a namespaced keyword to a proper node +(defn node + [kw] + (arachne.aristotle.graph/node kw)) + +(defn uri + [kw] + (.getURI (node kw))) + +;;; Replace :uniprot/Foo with their real URL +(defn unkw-results + [results] + (clojure.walk/prewalk + #(if (and (keyword? %) + (namespace %)) + (str "<" (uri %) ">") ;Maybe add <> ? need to be distinguished from real strings + %) + results)) diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 00d786d..8d1f157 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -7,8 +7,8 @@ [clojure.set :as set] ) ) +;;; TODO arrange for this to get loaded ! ;;; TODO get from config -;;; Note: has teneded to disappear and reappear, restarting Wifi helps (def endpoint "https://sparql.uniprot.org/") ;;; → Multitool - but shouldn't it be a macro rather than having to call eval TODO diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index fc9b2a6..c6f853a 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -117,7 +117,7 @@ ~base) base) ;; TODO throwing away pulls, need to implement those some other way - vars (map #(if (seq? %) (second %) %) find) + vars (distinct (map #(if (seq? %) (second %) %) find)) ] (reset! tap built) ;; TODO @@ -166,7 +166,9 @@ [{:keys [current-var] :as query} blockspec] (let [{:keys [attribute] :as blockdef} (spec-block-def blockspec) value (query-value blockspec blockdef "V") - var (?var (:attribute blockdef)) ;may not be used and uses up a number... + var (if (= attribute :rdfs/label) ;TODO kludge + (label-var current-var) + (?var (:attribute blockdef))) comp (keyword (query-value blockspec blockdef "comp"))] ;; would be better expressed with merge-recursive (-> query diff --git a/src/cljs/org/parkerici/enflame/datomic.cljs b/src/cljs/org/parkerici/enflame/datomic.cljs index e9b8aa2..7ab346e 100644 --- a/src/cljs/org/parkerici/enflame/datomic.cljs +++ b/src/cljs/org/parkerici/enflame/datomic.cljs @@ -119,6 +119,46 @@ (dissoc :error) (assoc :status :interrupted)))) +;;; Obviously in wrong file! + +;;; Old version that had keyword URIs +#_ +(defn reshape-sparql-row + [row] + (reduce-kv (fn [row k v] + (let [labelvar (keyword (str (name k) "Label"))] + (if (contains? row labelvar) + (-> row + (dissoc labelvar) + (assoc k {:entity v + :kind (namespace v) + :id (name v) + :label (get row labelvar)})) + row))) + row + row)) + +;;; New full URI (strings) mode. +(defn reshape-sparql-row + [row] + (reduce-kv (fn [row k v] + (let [labelvar (keyword (str (name k) "Label"))] + (if (contains? row labelvar) + (-> row + (dissoc labelvar) + (assoc k {:entity v + ; :kind (namespace v) + ; :id (name v) + :id v + :label (get row labelvar)})) + row))) + row + row)) + +(defn reshape-sparql-results + [r] + (map reshape-sparql-row r)) + (rf/reg-event-db :query-results (fn [db [_ {:keys [results count clipped]}]] @@ -127,7 +167,7 @@ ;; assuming a certain form of query (:project ...) query-cols (second @(rf/subscribe [:query])) ;; TODO CANDEL reshaping not working, better to use raw for now - reshaped results #_ (results/reshape-results results idents query-cols)] + reshaped (reshape-sparql-results results) #_ (results/reshape-results results idents query-cols)] (-> db (assoc :status :finished :results reshaped diff --git a/src/cljs/org/parkerici/enflame/view/browser.cljs b/src/cljs/org/parkerici/enflame/view/browser.cljs index 739ae81..4df101f 100644 --- a/src/cljs/org/parkerici/enflame/view/browser.cljs +++ b/src/cljs/org/parkerici/enflame/view/browser.cljs @@ -3,6 +3,7 @@ [org.parkerici.enflame.view.utils :as vu] [re-frame.core :as rf] [reagent.dom.server] + [org.parkerici.multitool.core :as u] [org.parkerici.enflame.blockly :as blockly] [org.parkerici.enflame.candel.query :as query] [org.parkerici.enflame.datomic :as datomic] @@ -22,6 +23,8 @@ ))) ;;; This stuff is a real mess +;;; Old CANDEL version +#_ (rf/reg-event-db :browse-id (fn [db [_ ddb id kind]] @@ -39,6 +42,32 @@ (assoc-in [:browse :spin?] true)) )) +;;; Probably should have uid as string, this is stupid. +(defn sparql-pull-query + [id kind] + (prn :spq id kind) + (u/de-ns + `(:project (?p ?o) + (:bgp + [~id ?p ?o])))) + +;;; SPARQL version +(rf/reg-event-db + :browse-id + (fn [db [_ id kind]] + (datomic/do-query + nil #_ (or ddb (:ddb db)) + (sparql-pull-query id kind) + nil + nil + #(rf/dispatch [:browse-results %]) + ) + (-> db +;;; Would be nice... +; (assoc-in [:browse :browsing] ent) + (assoc-in [:browse :spin?] true)) + )) + ;;; TODO changing db should invalidate :browse state and perhaps other things (rf/reg-event-db :browse-0 @@ -81,6 +110,8 @@ (and (>= new-index 0) (< new-index (count history)))))) +;;; CANDEL version +#_ (rf/reg-event-db :browse-results (fn [db [_ {:keys [results]}]] @@ -95,6 +126,27 @@ (update-in [:browse :history] conj object) )))) +;;; SPARQL version +(defn reshape-sparql + [results] + (reduce (fn [obj row] + (update obj (:p row) conj (or (:o row) (:s row)))) + {} + results)) + +(rf/reg-event-db + :browse-results + (fn [db [_ {:keys [results]}]] + (prn :results results) + ;; This is still a bit hinky + (let [object (reshape-sparql results)] + + (-> db + (assoc-in [:browse :data] object) + (assoc-in [:browse :spin?] false) + (update-in [:browse :history] conj object) + )))) + (rf/reg-sub :browse (fn [db _] @@ -103,7 +155,8 @@ ;;; This is called by javascript from ag-grid (defn ^:export browse [id kind] - (rf/dispatch [:browse-id nil id (keyword kind)])) + (prn :browse id kind) + (rf/dispatch [:browse-id id kind])) (defn history [data browsing] ;TODO Are both these necessary diff --git a/src/cljs/org/parkerici/enflame/view/utils.cljs b/src/cljs/org/parkerici/enflame/view/utils.cljs index 0e3032c..ff82d29 100644 --- a/src/cljs/org/parkerici/enflame/view/utils.cljs +++ b/src/cljs/org/parkerici/enflame/view/utils.cljs @@ -65,26 +65,24 @@ [:div.card-body body]]])) + + ;;; Rendering -;;; This code should be kept in sync with export in server.clj -(defn render-entity-contents +(defn render-browse-link [ent] - ;; Unlike :href #, the ugliness below won't change the page scrolling - (if (and (:kind ent) (or (:id ent) (:db/id ent))) - [:a - ;; This should work but doesn't, so we go through an ugly kludge - #_ {:href "javascript:void(0)" :on-click #(rf/dispatch [:browse ent])} - {:href (str "javascript:org.parkerici.enflame.view.browser.browse.call(null," ; no idea why null is needed, but it is - (or (:id ent) (:db/id ent)) - ",\"" - (name (:kind ent)) - "\");")} - (entity-label ent)] - (entity-label ent))) + [:a + ;; This should work but doesn't, so we go through an ugly kludge + #_ {:href "javascript:void(0)" :on-click #(rf/dispatch [:browse ent])} + {:href (str "javascript:org.parkerici.enflame.view.browser.browse.call(null," + (pr-str (:id ent)) + ");")} + (entity-label ent)]) -(defn render-entity [ent] - (render-entity-contents ent)) +;;; This code should be kept in sync with export in server.clj +(defn render-entity + [ent] + (render-browse-link ent)) (defn delist [thing] @@ -105,7 +103,6 @@ ~(when (>= (count l) list-limit) [:i (str ", " (count l) " total")])]) - (defn render [thingy idents] (let [thing (delist thingy)] diff --git a/src/cljs/org/parkerici/enflame/views.cljs b/src/cljs/org/parkerici/enflame/views.cljs index 7859875..dd2f6c6 100644 --- a/src/cljs/org/parkerici/enflame/views.cljs +++ b/src/cljs/org/parkerici/enflame/views.cljs @@ -127,6 +127,8 @@ ;; Sort is not quite what you want, but without it the columns of different entities get jumbled (let [cols (sort @(rf/subscribe [:display-columns])) idents @(rf/subscribe [:idents]) + ;; Note: data can only contain prims, maps, vectors + ;; Keywords get turned into strings data @(rf/subscribe [:results]) ;; First non-null value in column sample-value (fn [col] (some #(col %) data))] From 226e5179d06eef4b99413dc9247d8e4f0f97b2dc Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Thu, 26 Jan 2023 19:44:50 -0800 Subject: [PATCH 07/11] Browser working a bit better --- src/clj/org/parkerici/enflame/server.clj | 2 +- src/clj/org/parkerici/enflame/sparql.clj | 18 ++++++---- src/clj/org/parkerici/enflame/uniprot.clj | 10 +----- .../parkerici/enflame/sparql/generate.cljc | 3 +- .../org/parkerici/enflame/view/browser.cljs | 36 ++++++++++++++----- .../org/parkerici/enflame/view/utils.cljs | 18 ++++++++-- 6 files changed, 59 insertions(+), 28 deletions(-) diff --git a/src/clj/org/parkerici/enflame/server.clj b/src/clj/org/parkerici/enflame/server.clj index 39e69e8..0ed5712 100644 --- a/src/clj/org/parkerici/enflame/server.clj +++ b/src/clj/org/parkerici/enflame/server.clj @@ -42,7 +42,7 @@ ;; TODO Multimethod? (case (:type (:source config)) :candel (datomic/query db query args candelabra-token config) - :sparql (sparql/unkw-results ;TEMP? + :sparql (sparql/tweak-results ;TEMP? (sparql/q (:sparql-endpoint (:source config)) query)))) (defn handle-query diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index 3189ec4..fa14b0e 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -12,8 +12,9 @@ ;;; Not clear what Aristotle does that isn't better handled by Jena SSE https://jena.apache.org/documentation/notes/sse.html +(def sparql-default-limit 2000) ;Sanity preservation. -(defn ->sparql [bgp & {:keys [limit]}] +(defn ->sparql [bgp & {:keys [limit] :or {limit sparql-default-limit}}] (let [query (-> bgp q/build org.apache.jena.sparql.algebra.OpAsQuery/asQuery)] @@ -322,11 +323,16 @@ (.getURI (node kw))) ;;; Replace :uniprot/Foo with their real URL -(defn unkw-results +;;; Also fix dates and any other unserializable values +(defn tweak-results [results] (clojure.walk/prewalk - #(if (and (keyword? %) - (namespace %)) - (str "<" (uri %) ">") ;Maybe add <> ? need to be distinguished from real strings - %) + #(cond (and (keyword? %) + (namespace %)) + (str "<" (uri %) ">") + ;; TODO any other unserializable types? + (instance? org.apache.jena.datatypes.xsd.XSDDateTime %) + (str %) + :else + %) results)) diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 8d1f157..00a08eb 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -376,15 +376,7 @@ [?Taxon24 :uniprot/scientificName ?scientificName20]))) ;; Proteins from organisms (seems valid but takes like 30 minutes to complete) -(:project - (?Protein15 ?Protein15Label ?Taxon19 ?Taxon19Label ?scientificName9) - (:filter - (regex ?scientificName9 "^Tapinanthus.*" "") - (:bgp - [?Protein15 :rdf/type :uniprot/Protein] - [?Protein15 :uniprot/organism ?Taxon19] - [?Taxon19 :rdf/type :uniprot/Taxon] - [?Taxon19 :uniprot/scientificName ?scientificName9]))) + ;;; "Elapsed time: 2596667.627792 msecs" (that's 43.5 fucking minutes!) ;;; Ah but a mere 30 seconds if you actually do the right ghing: diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index c6f853a..751e70c 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -176,7 +176,8 @@ (cond (= value :any) [[current-var attribute var]] (= comp :is) - [[current-var attribute value]] + [[current-var attribute value] + [current-var attribute var]] ;we also need this, so label gets included in result set and transmitted to client. :else ;its a regex of some kind [[current-var attribute var]]) ) diff --git a/src/cljs/org/parkerici/enflame/view/browser.cljs b/src/cljs/org/parkerici/enflame/view/browser.cljs index 4df101f..1be69ac 100644 --- a/src/cljs/org/parkerici/enflame/view/browser.cljs +++ b/src/cljs/org/parkerici/enflame/view/browser.cljs @@ -11,6 +11,7 @@ ) ) +;;; CANDEL only, not sure why this exists when :browse-id handler doe the same thing (defn browser-pull [ddb ent handler] (let [kind (results/infer-kind ent)] @@ -51,24 +52,31 @@ (:bgp [~id ?p ?o])))) + + +(defn sparql-do-pull + [id] + (datomic/do-query + nil + (sparql-pull-query id nil) + nil + nil + #(rf/dispatch [:browse-results %]) + )) + ;;; SPARQL version (rf/reg-event-db :browse-id (fn [db [_ id kind]] - (datomic/do-query - nil #_ (or ddb (:ddb db)) - (sparql-pull-query id kind) - nil - nil - #(rf/dispatch [:browse-results %]) - ) + (sparql-do-pull id) (-> db ;;; Would be nice... -; (assoc-in [:browse :browsing] ent) + ; (assoc-in [:browse :browsing] ent) (assoc-in [:browse :spin?] true)) )) ;;; TODO changing db should invalidate :browse state and perhaps other things +;;; CANDEL (rf/reg-event-db :browse-0 (fn [db [_ ent ddb]] @@ -80,6 +88,15 @@ (assoc-in [:browse :browsing] ent) (assoc-in [:browse :spin?] true)))) +;;; SPARQL +(rf/reg-event-db + :browse-0 + (fn [db [_ ent ddb]] + (sparql-do-pull ent) + (-> db + (assoc-in [:browse :browsing] ent) + (assoc-in [:browse :spin?] true)))) + (rf/reg-event-db :browse (fn [db [_ ent ddb]] @@ -209,7 +226,8 @@ css-class (when kind (str (name kind) "-kind"))] [:tr {:class (if (= k (ffirst sorted)) "browser_head" "browser_row")} [:th {:class (str "browser_row_label" " " css-class)} - k] + ;; TODO maybe these don't want to be links? Cute though, and sometimes they have interesting attributes of their own + (vu/render k nil)] [:td {:class "browser_row_contents"} (vu/render v idents)]])))]] ]))) diff --git a/src/cljs/org/parkerici/enflame/view/utils.cljs b/src/cljs/org/parkerici/enflame/view/utils.cljs index ff82d29..46e8bf3 100644 --- a/src/cljs/org/parkerici/enflame/view/utils.cljs +++ b/src/cljs/org/parkerici/enflame/view/utils.cljs @@ -103,6 +103,18 @@ ~(when (>= (count l) list-limit) [:i (str ", " (count l) " total")])]) +(defn sparql-uri? + [thing] + (and (string? thing) + (re-matches #"<.*>" thing))) + +;;; Render attributes by short name (note: something like "uniprot/core/organism" would also work) +(defn uri-short-name + [uri] + (let [short-name (second (re-matches #"<.*[\/\#](.*?)>" uri))] + ;; TODO should have full name as tooltip or something + short-name)) + (defn render [thingy idents] (let [thing (delist thingy)] @@ -112,9 +124,11 @@ (if-let [ident (get idents (:db/id thing))] (name ident) (render-entity thing)) - (results/entity-id? thing) + ;; CANDEL + #_ (results/entity-id? thing) + (sparql-uri? thing) (if-let [ident (get idents thing)] (name ident) - (render-entity {:id thing :label (str thing)})) ;this should happen only on :db/id + (render-entity {:id thing :label (uri-short-name thing)})) ;this should happen only on :db/id :else (str thing)))) From 61b2fb4db7eaef22188e42096a2a5bc257e7ff0c Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Mon, 30 Jan 2023 20:25:11 -0800 Subject: [PATCH 08/11] Refactor uniprot code and make sure it gets loaded --- resources/uniprot-alzabo.edn | 12 +- resources/uniprot-config.edn | 3 +- src/clj/org/parkerici/enflame/config.clj | 3 + src/clj/org/parkerici/enflame/sparql.clj | 7 + src/clj/org/parkerici/enflame/uniprot.clj | 396 +----------------- .../parkerici/enflame/uniprot_ontology.clj | 365 ++++++++++++++++ 6 files changed, 390 insertions(+), 396 deletions(-) create mode 100644 src/clj/org/parkerici/enflame/uniprot_ontology.clj diff --git a/resources/uniprot-alzabo.edn b/resources/uniprot-alzabo.edn index 9afcbe8..7502bf1 100644 --- a/resources/uniprot-alzabo.edn +++ b/resources/uniprot-alzabo.edn @@ -1,3 +1,6 @@ +;;; NOTE: hand edited, don't overwrite! + + {:title "UNIPROT", :kinds {:Organelle @@ -44,7 +47,9 @@ :scientificName {:type :string, :uri :uniprot/scientificName, - :attribute :uniprot/scientificName}, + :attribute :uniprot/scientificName + :name? true + }, :reviewed {:type :boolean, :uri :uniprot/reviewed, @@ -62,7 +67,8 @@ :attribute :uniprot/obsolete, :doc "True if this resource has been replaced or deleted."}, :host {:type :Taxon, :uri :uniprot/host, :attribute :uniprot/host}, - :label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + ;; Apparently not! + #_ :label #_ {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :synonym {:type :string, :uri :uniprot/synonym, @@ -349,6 +355,8 @@ {:title "Gene", :fields {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + ;; Added by hand. Looks like at least some genes don't have :rdfs/label + :prefLabel {:type :string :uri :skos/prefLabel :attribute :skos/prefLabel} :orfName {:type :string, :uri :uniprot/orfName, diff --git a/resources/uniprot-config.edn b/resources/uniprot-config.edn index 13cad51..140fe67 100644 --- a/resources/uniprot-config.edn +++ b/resources/uniprot-config.edn @@ -1,4 +1,5 @@ -{:source {:type :sparql +{:requires [org.parkerici.enflame.uniprot] + :source {:type :sparql :sparql-endpoint "https://sparql.uniprot.org/" ;; TODO implement :prefixes {uniprot "http://purl.uniprot.org/core/" diff --git a/src/clj/org/parkerici/enflame/config.clj b/src/clj/org/parkerici/enflame/config.clj index c02e421..db443d2 100644 --- a/src/clj/org/parkerici/enflame/config.clj +++ b/src/clj/org/parkerici/enflame/config.clj @@ -15,6 +15,8 @@ file-config (edn/read-string (slurp file)) config (merge env-config file-config)] (pprint/pprint config) + (doseq [r (:requires config)] + (require r)) (reset! the-config config))) (defn config @@ -30,6 +32,7 @@ ;;; TODO +:requires :port :dev? :schema ;schema file or url (or) diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index fa14b0e..7c865d6 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -204,6 +204,12 @@ (do-query (sparql-source endpoint) sparql) (q endpoint (->sparql sparql)))) +(defn ^:api describe + [endpoint ent] + (concat + (q endpoint `(:bgp [~ent ?p ?o])) + (q endpoint `(:bgp [?s ?p ~ent])))) + (defn ontology-query [endpoint] @@ -336,3 +342,4 @@ :else %) results)) + diff --git a/src/clj/org/parkerici/enflame/uniprot.clj b/src/clj/org/parkerici/enflame/uniprot.clj index 00a08eb..561eb24 100644 --- a/src/clj/org/parkerici/enflame/uniprot.clj +++ b/src/clj/org/parkerici/enflame/uniprot.clj @@ -7,7 +7,8 @@ [clojure.set :as set] ) ) -;;; TODO arrange for this to get loaded ! +;;; TODO this now gets loaded at startup, should not do any queries + ;;; TODO get from config (def endpoint "https://sparql.uniprot.org/") @@ -33,404 +34,13 @@ (reg/prefix 'skos "http://www.w3.org/2004/02/skos/core#") ;;; Try (this does not seem to work, sigh) -(reg/prefix 'uniuni "http://purl.uniprot.org/") - - -(def external-ontology - '{:rdf/Statement {:rdf/type (:owl/Class)}}) - - -(defn fix-domain - [att] - (->> (concat - (map :d1 (q `[:bgp [~att :rdfs/domain ?d1]])) - (map :d2 (remove #(keyword? (:d1 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2]]))) - (map :d3 (remove #(keyword? (:d2 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3]]))) - (map :d4 (remove #(keyword? (:d3 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3] [?d3 ?p3 ?d4]])))) - distinct - (filter #(and (keyword? %) (= "uniprot" (namespace %)))))) - -(defn fix-domains - [ontology-in] - (let [atts (->> ontology-in - (filter (fn [[k v]] (let [domain (:rdfs/domain v)] - (and domain (> (count domain) 1))))) - (map first))] - (prn :atts atts) - (reduce (fn [ontology att] - (assoc-in ontology [att :rdfs/domain] (fix-domain att))) - ontology-in - atts))) - -;;; TODO shouldn't run on compile -(def uniprot-ontology - (-> - (sq/entify - (q - '(:bgp [?s :rdfs/isDefinedBy ?uniprot] - [?s ?p ?o]))) - (merge external-ontology) - ;; This one field comes back with an unserializable object, just patch it - ;; Real thing (.-lexicalValue _) if need be - (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")) - fix-domains)) - -;;; TODO damn I wish these were more composable - - -;;; Note :unipath/399.28.3.3 doesn't work with Clojure reader, argh -#_ -(pull (keyword "unipath" "399.28.3.3")) - - -(defn filtered-by - [field value] - (u/dissoc-if (fn [[n d]] - (not (some #(= value %) (field d)))) - uniprot-ontology)) - -(defn filtered-by-any - [field values] - (u/dissoc-if (fn [[n d]] - (empty? (set/intersection (set (field d)) (set values)))) - uniprot-ontology)) - - -(defn filtered-by-rdf-type - [type] - (u/dissoc-if (fn [[n d]] - ;; TODO assuming a single type - (not (= (:rdf/type d) (list type)))) - uniprot-ontology) ) - -(defn classes - [] - (filtered-by-rdf-type :owl/Class)) - -(defn subclasses - [class] - (filtered-by :rdfs/subClassOf class)) - -(def all-subclasses - (u/transitive-closure (comp keys subclasses))) - -(defn properties - [] - (merge (filtered-by-rdf-type :owl/DatatypeProperty) - (filtered-by-rdf-type :owl/ObjectProperty))) - -(defn properties-for-domain - [class] - (filtered-by-any :rdfs/domain (all-subclasses class))) - -(defn uniprot? - [ent] - (and (keyword? ent) - (= "uniprot" (namespace ent)))) - -(defn top-classes - [] - (filter (fn [[c d]] - (not (contains? (classes) (:rdfs/subClassOf d)))) - (classes))) - -(defn top-classes - [] - (let [non-tops (keys (filtered-by-any :rdfs/subClassOf (keys (classes))))] - (apply dissoc (classes) non-tops))) - -#_ -(:uniprot/Database - :uniprot/Structured_Name - :uniprot/Enzyme_Regulation_Annotation - :uniprot/Enzyme - :uniprot/Excluded_Proteome - :uniprot/Gene - :uniprot/Citation - :uniprot/Attribution - :uniprot/Organelle - :uniprot/Status - :uniprot/Part - :uniprot/Participant - :uniprot/Journal - :uniprot/Structure_Mapping_Statement - :uniprot/Proteome - :uniprot/Nucleotide_Mapping_Statement - :uniprot/Method - :uniprot/Taxon - :uniprot/Molecule - :uniprot/Obsolete - :uniprot/Disease - :uniprot/Resource - :uniprot/Proteome_Component - :uniprot/Cluster - :uniprot/Domain_Assignment_Statement - :uniprot/Protein_Existence - :uniprot/Subcellular_Location - :uniprot/Transposon - :uniprot/Plasmid - :uniprot/Concept - :uniprot/Annotation - :uniprot/Endpoint_Statement - :uniprot/Protein - :uniprot/Tissue - :uniprot/Sequence - :uniprot/Strain - :uniprot/Interaction - :uniprot/Catalytic_Activity - :uniprot/Not_Obsolete - :uniprot/Pathway - :uniprot/Citation_Statement - :uniprot/Rank - :uniprot/Reviewed) - -(comment - (count (instances :uniprot/Pathway)) - ; 3117 - (count (instances :uniprot/Disease)) - ; 6202 whoops now 0, wtf? - (count (instances :uniprot/Molecule)) - ) - -(defn describe - [ent] - (concat - (q `(:bgp [~ent ?p ?o])) - (q `(:bgp [?s ?p ~ent])))) - - -(comment -(frequencies (map :rdf/type (vals uniprot-ontology) )) -{(:owl/DatatypeProperty) 43, - (:owl/ObjectProperty) 67, - (:owl/NamedIndividual :owl/Thing :uniprot/Organelle) 9, - (:owl/NamedIndividual :owl/Thing :uniprot/Rank) 31, - (:owl/Class) 168, - (:owl/InverseFunctionalProperty :owl/FunctionalProperty :owl/ObjectProperty) 1, - (:owl/FunctionalProperty :owl/ObjectProperty) 6, - (:owl/FunctionalProperty :owl/DatatypeProperty) 31, - (:owl/NamedIndividual :owl/Thing :uniprot/Status) 4, - (:owl/NamedIndividual :owl/Thing :uniprot/Protein_Existence) 5, - (:owl/NamedIndividual :owl/Thing :uniprot/Mass_Measurement_Method) 7, - (:owl/NamedIndividual :owl/Thing :uniprot/Structure_Determination_Method) 7} -) - - - -#_ -(frequencies (map :rdfs/domain (vals (properties)))) - -;;; Huh. -#_ -{nil 17, - (:uniprot/Subcellular_Location_Annotation) 1, - (:uniprot/Structured_Name) 1, - (:uniprot/Proteome) 2, - (:uniprot/Interaction) 1, - (:uniprot/Reviewed_Protein) 1, - (_626915beb033654fc13c8409d68fbefb) 1, - (:uniprot/RNA_Editing_Annotation) 1, - (:uniprot/External_Sequence) 1, - (_135e6ace0ba508ab2319c50063cc0ede) 1, - (_7a99d05307375d434d4a3f01c938cad7) 1, - (_b868ac3eb7960429cb539cea5f6300ae) 1, - (:uniprot/Gene) 2, - (_ade505809c0086211dc02c0f9464258e) 1, - (:uniprot/Resource) 1, - (_cfd54a0e1d32a8d62b76f3490d7f2311) 1, - (:uniprot/Transcript_Resource) 2, - (:uniprot/Published_Citation) 2, - (_e978c46c81fd658d3e99796c5eaf4502) 1, - (_5fcb391e3136fc76ba988a8b5f961505) 1, - (_941b9dd17dd300caedb9cb8c0e3f958e) 1, - (:uniprot/Disease_Annotation) 1, - (_d0e6352838bda96a5d57075fed61b8fc) 1, - (:uniprot/Simple_Sequence) 2, - (:uniprot/Protein) 13, - (:uniprot/Modified_Sequence) 1, - (:uniprot/Enzyme) 2, - (:uniprot/Catalytic_Activity_Annotation) 2, - (_49396fea4d19b3d5b39285cc1252056b) 1, - (_e2daa4cde5ccb48ceaa946a7c97ec83e) 1, - (:uniprot/Journal) 1, - (_3725fc1a96109bd014937233e0cc1e80) 1, - (:uniprot/Cluster) 2, - (_086cb0592bb819a0cd93d44d3bf577d8) 1, - (:uniprot/Binding_Site_Annotation) 2, - (_63f5f671dd82a005a9e91a5c22c458a5) 1, - (:uniprot/Structure_Mapping_Statement) 1, - (_a2745b16016d308213a790963118d9a8) 1, - (:rdfs/Resource) 1, - (:uniprot/Thesis_Citation) 1, - (_d84a5a698f10400d9ffba7376653fc21) 1, - (:uniprot/Nucleotide_Resource) 2, - (_9d3897b13bb29c76ec292b254542decb) 1, - (:uniprot/Subcellular_Location) 1, - (_612e2c86654093537c55009db223f480) 1, - (:uniprot/Book_Citation) 2, - (:uniprot/Kinetics_Annotation) 2, - (:uniprot/Sequence) 3, - (:uniprot/Cofactor_Annotation) 1, - (:uniprot/Catalytic_Activity) 1, - (:uniprot/Citation) 5, - (:uniprot/Database) 5, - (:uniprot/Submission_Citation) 1, - (:uniprot/Taxon) 4, - (:uniprot/Attribution) 1, - (:uniprot/Citation_Statement) 2} - - -;;; Alzabo schema gen - -;;; TODO should include the real URI somewhere - -;;; TODO Alzabo has no concept of subclass, would be interesting to add -;;; For now, it compresses everything into top classes - -;;; Remove namespace (see u/d-ns) -(defn nons - [key] - (if (keyword key) - (keyword (name key)) - key)) - -;;; TODO add skos etc fields -(defn class-alzabo-fields - [class] - (apply - merge - {:label {:type :string - :uri :rdfs/label - :attribute :rdfs/label}} - (for [[n d] (properties-for-domain class)] - {(nons n) - {:type (or (nons (first (:rdfs/range d))) - :string) ;temp but nil doen't work - ;; :cardinality ... - :uri n - :attribute n ;aka :uri, but this leverages existing mechanisms - :doc (first (:rdfs/comment d))}} - ) - )) - -(defn alzabo - [] - (u/clean-walk - {:title "UNIPROT" - :kinds - (apply - merge - (for [[tc tc-def] (top-classes)] - {(nons tc) - {:doc (first (:rdfs/comment tc-def)) - :title (first (:rdfs/label tc-def)) ;not actually used or defined - :fields (or (class-alzabo-fields tc) {}) - :uri tc - }}))} - nil?)) - -#_ -(ju/schppit "uniprot-ontology.edn" uniprot-ontology) - -#_ -(ju/schppit "resources/uniprot-alzabo.edn" (alzabo)) - - -;;; Regex usage - - - - `(:bgp [?protein :rdf/type :uniprot/Protein] - [?protein :uniprot/classifiedWith ?concept] - [?concept :rdfs/label ?clabel] - [(regex ?clabel "FOO.*" "")]) - - -;;; Actual generated queries - -;;; The simplest -(comment - -(:project - (?Pathway5 ?Pathway5Label) - (:bgp - [?Pathway5 :rdfs/label ?Pathway5Label] - [?Pathway5 :rdf/type :uniprot/Pathway])) - -;;; With regex label filtering -(:project - (?Pathway9 ?Pathway9Label ?label11) - (:filter - (regex ?label11 ".*synth.*" "") - (:bgp - [?Pathway9 :rdf/type :uniprot/Pathway] - [?Pathway9 :rdfs/label ?label11]))) - -;;; Taxon -(:project - (?Taxon24 ?Taxon24Label ?scientificName20) - (:filter - (regex ?scientificName20 "^Tapinanthus.*" "") - (:bgp - [?Taxon24 :rdf/type :uniprot/Taxon] - [?Taxon24 :uniprot/scientificName ?scientificName20]))) - -;; Proteins from organisms (seems valid but takes like 30 minutes to complete) - -;;; "Elapsed time: 2596667.627792 msecs" (that's 43.5 fucking minutes!) - -;;; Ah but a mere 30 seconds if you actually do the right ghing: -(:project - (?Protein15 ?Protein15Label ?Taxon19 ?scientificName9) - (:filter - (regex ?scientificName9 "^Tapinanthus.*" "") - (:bgp - [?Protein15 :rdf/type :uniprot/Protein] - [?Protein15 :rdfs/label ?Protein15Label] - [?Protein15 :uniprot/organism ?Taxon19] - [?Taxon19 :rdf/type :uniprot/Taxon] - ;; Note: Taxons don't have :rdfs/label and you have to use differnt att! - ;[?Taxon19 :rdfs/label ?Taxon19Label] - [?Taxon19 :uniprot/scientificName ?scientificName9]))) -) +#_ (reg/prefix 'uniuni "http://purl.uniprot.org/") -;;; ❖⟐❖ fixing blank-node domains ❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖ -#_ -#:uniprot{:submittedName (:uniprot/Part :uniprot/Protein), - :organism (:uniprot/Protein :uniprot/Sequence), - :date (:uniprot/Attribution :uniprot/Citation), - :representativeFor (:uniprot/Protein :uniprot/Sequence), - :mnemonic (:uniprot/Cluster :uniprot/Protein), - :reviewed (:uniprot/Protein :uniprot/Taxon), - :place (:uniprot/Book_Citation :uniprot/Thesis_Citation), - :substitution (:uniprot/Mutagenesis_Annotation :uniprot/Natural_Variant_Annotation), - :method (:uniprot/Mass_Spectrometry_Annotation :uniprot/Structure_Resource), - :obsolete (:uniprot/Protein :uniprot/Taxon), - :orientation (:uniprot/Cellular_Component :uniprot/cellularComponent), - :created (:uniprot/Protein :uniprot/Resource), - :structuredName (:uniprot/Part :uniprot/Protein), - :modified (:uniprot/Cluster :uniprot/Protein), - :cellularComponent (:uniprot/Cellular_Component :uniprot/cellularComponent), - :pages (:uniprot/Book_Citation :uniprot/Journal_Citation), - :volume (:uniprot/Book_Citation :uniprot/Journal_Citation), - :citation (:uniprot/Cellular_Component :uniprot/Database), - :alternativeName (:uniprot/Part :uniprot/Protein), - :conflictingSequence (:uniprot/Protein :uniprot/Sequence_Caution_Annotation), - :enzyme (:uniprot/Part :uniprot/Protein), - :attribution (:uniprot/Protein), - :recommendedName (:uniprot/Part :uniprot/Protein), - :sequence (:uniprot/Annotation :uniprot/Protein), - :replaces (:uniprot/Enzyme :uniprot/Protein), - :mappedAnnotation (:uniprot/Citation_Statement :uniprot/Protein), - :version (:uniprot/Protein :uniprot/Sequence), - :replacedBy (:uniprot/Enzyme :uniprot/Protein), - :topology (:uniprot/Cellular_Component :uniprot/cellularComponent)} - diff --git a/src/clj/org/parkerici/enflame/uniprot_ontology.clj b/src/clj/org/parkerici/enflame/uniprot_ontology.clj new file mode 100644 index 0000000..185cb44 --- /dev/null +++ b/src/clj/org/parkerici/enflame/uniprot_ontology.clj @@ -0,0 +1,365 @@ +(ns org.parkerici.enflame.uniprot-ontology + (:require + [org.parkerici.enflame.sparql :as sq] + [arachne.aristotle.registry :as reg] + [org.parkerici.multitool.core :as u] + [org.parkerici.multitool.cljcore :as ju] + [clojure.set :as set] + ) ) + +;;; Stuff involved in building the ontology, should not be needed at query runtime + +;;; TODO get from config +(def endpoint "https://sparql.uniprot.org/") + +;;; → Multitool - but shouldn't it be a macro rather than having to call eval TODO +(defn curried-api + [namespace arg1] ;TODO should take arb # args + `(do + ~@(for [[s v] (ns-publics namespace) + :when (:api (meta v))] + `(def ~s ~(partial v arg1))))) + +(eval (curried-api 'org.parkerici.enflame.sparql endpoint)) + + +(def external-ontology + '{:rdf/Statement {:rdf/type (:owl/Class)} + ;; This way might be cleaner but more work + ;; :Taxon {:fields {:scientificName {:name? true}}} + :Taxon {:fields {:label {:type :string :uri :uniprot/scientificName :attribute :uniprot/scientificName}}} + :Gene {:fields {:label {:type :string :uri :skos/prefLable :attribute :skos/prefLable}}} + }) + +;;; ❖⟐❖ fixing blank-node domains ❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖ + +(defn fix-domain + [att] + (->> (concat + (map :d1 (q `[:bgp [~att :rdfs/domain ?d1]])) + (map :d2 (remove #(keyword? (:d1 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2]]))) + (map :d3 (remove #(keyword? (:d2 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3]]))) + (map :d4 (remove #(keyword? (:d3 %)) (q `[:bgp [~att :rdfs/domain ?d1] [?d1 ?p1 ?d2] [?d2 ?p2 ?d3] [?d3 ?p3 ?d4]])))) + distinct + (filter #(and (keyword? %) (= "uniprot" (namespace %)))))) + +(defn fix-domains + [ontology-in] + (let [atts (->> ontology-in + (filter (fn [[k v]] (let [domain (:rdfs/domain v)] + (and domain (> (count domain) 1))))) + (map first))] + (reduce (fn [ontology att] + (assoc-in ontology [att :rdfs/domain] (fix-domain att))) + ontology-in + atts))) + +;;; TODO shouldn't run on compile +(def uniprot-ontology + (-> + (sq/entify + (q + '(:bgp [?s :rdfs/isDefinedBy ?uniprot] + [?s ?p ?o]))) + (merge external-ontology) + ;; This one field comes back with an unserializable object, just patch it + ;; Real thing (.-lexicalValue _) if need be + (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")) + fix-domains)) + +(defn filtered-by + [field value] + (u/dissoc-if (fn [[n d]] + (not (some #(= value %) (field d)))) + uniprot-ontology)) + +(defn filtered-by-any + [field values] + (u/dissoc-if (fn [[n d]] + (empty? (set/intersection (set (field d)) (set values)))) + uniprot-ontology)) + + +(defn filtered-by-rdf-type + [type] + (u/dissoc-if (fn [[n d]] + ;; TODO assuming a single type + (not (= (:rdf/type d) (list type)))) + uniprot-ontology) ) + +(defn classes + [] + (filtered-by-rdf-type :owl/Class)) + +(defn subclasses + [class] + (filtered-by :rdfs/subClassOf class)) + +(def all-subclasses + (u/transitive-closure (comp keys subclasses))) + +(defn properties + [] + (merge (filtered-by-rdf-type :owl/DatatypeProperty) + (filtered-by-rdf-type :owl/ObjectProperty))) + +(defn properties-for-domain + [class] + (filtered-by-any :rdfs/domain (all-subclasses class))) + +(defn uniprot? + [ent] + (and (keyword? ent) + (= "uniprot" (namespace ent)))) + +(defn top-classes + [] + (filter (fn [[c d]] + (not (contains? (classes) (:rdfs/subClassOf d)))) + (classes))) + +(defn top-classes + [] + (let [non-tops (keys (filtered-by-any :rdfs/subClassOf (keys (classes))))] + (apply dissoc (classes) non-tops))) + + +#_ +(:uniprot/Database + :uniprot/Structured_Name + :uniprot/Enzyme_Regulation_Annotation + :uniprot/Enzyme + :uniprot/Excluded_Proteome + :uniprot/Gene + :uniprot/Citation + :uniprot/Attribution + :uniprot/Organelle + :uniprot/Status + :uniprot/Part + :uniprot/Participant + :uniprot/Journal + :uniprot/Structure_Mapping_Statement + :uniprot/Proteome + :uniprot/Nucleotide_Mapping_Statement + :uniprot/Method + :uniprot/Taxon + :uniprot/Molecule + :uniprot/Obsolete + :uniprot/Disease + :uniprot/Resource + :uniprot/Proteome_Component + :uniprot/Cluster + :uniprot/Domain_Assignment_Statement + :uniprot/Protein_Existence + :uniprot/Subcellular_Location + :uniprot/Transposon + :uniprot/Plasmid + :uniprot/Concept + :uniprot/Annotation + :uniprot/Endpoint_Statement + :uniprot/Protein + :uniprot/Tissue + :uniprot/Sequence + :uniprot/Strain + :uniprot/Interaction + :uniprot/Catalytic_Activity + :uniprot/Not_Obsolete + :uniprot/Pathway + :uniprot/Citation_Statement + :uniprot/Rank + :uniprot/Reviewed) + + + +(comment + (count (instances :uniprot/Pathway)) + ; 3117 + (count (instances :uniprot/Disease)) + ; 6202 whoops now 0, wtf? + (count (instances :uniprot/Molecule)) + ) + +(comment +(frequencies (map :rdf/type (vals uniprot-ontology) )) +{(:owl/DatatypeProperty) 43, + (:owl/ObjectProperty) 67, + (:owl/NamedIndividual :owl/Thing :uniprot/Organelle) 9, + (:owl/NamedIndividual :owl/Thing :uniprot/Rank) 31, + (:owl/Class) 168, + (:owl/InverseFunctionalProperty :owl/FunctionalProperty :owl/ObjectProperty) 1, + (:owl/FunctionalProperty :owl/ObjectProperty) 6, + (:owl/FunctionalProperty :owl/DatatypeProperty) 31, + (:owl/NamedIndividual :owl/Thing :uniprot/Status) 4, + (:owl/NamedIndividual :owl/Thing :uniprot/Protein_Existence) 5, + (:owl/NamedIndividual :owl/Thing :uniprot/Mass_Measurement_Method) 7, + (:owl/NamedIndividual :owl/Thing :uniprot/Structure_Determination_Method) 7} +) + +;;; Alzabo schema gen + +;;; TODO should include the real URI somewhere + +;;; TODO Alzabo has no concept of subclass, would be interesting to add +;;; For now, it compresses everything into top classes + +;;; Remove namespace (see u/d-ns) +(defn nons + [key] + (if (keyword key) + (keyword (name key)) + key)) + +;;; TODO add skos etc fields +(defn class-alzabo-fields + [class] + (apply + merge + {:label {:type :string + :uri :rdfs/label + :attribute :rdfs/label}} + (for [[n d] (properties-for-domain class)] + {(nons n) + {:type (or (nons (first (:rdfs/range d))) + :string) ;temp but nil doen't work + ;; :cardinality ... + :uri n + :attribute n ;aka :uri, but this leverages existing mechanisms + :doc (first (:rdfs/comment d))}} + ) + )) + +(defn alzabo + [] + (u/clean-walk + {:title "UNIPROT" + :kinds + (apply + merge + (for [[tc tc-def] (top-classes)] + {(nons tc) + {:doc (first (:rdfs/comment tc-def)) + :title (first (:rdfs/label tc-def)) ;not actually used or defined + :fields (or (class-alzabo-fields tc) {}) + :uri tc + }}))} + nil?)) + + +#_ +(ju/schppit "uniprot-ontology.edn" uniprot-ontology) + +#_ +(ju/schppit "resources/uniprot-alzabo.edn" (alzabo)) + +#_ +(frequencies (map :rdfs/domain (vals (properties)))) + +;;; Huh. +#_ +{nil 17, + (:uniprot/Subcellular_Location_Annotation) 1, + (:uniprot/Structured_Name) 1, + (:uniprot/Proteome) 2, + (:uniprot/Interaction) 1, + (:uniprot/Reviewed_Protein) 1, + (_626915beb033654fc13c8409d68fbefb) 1, + (:uniprot/RNA_Editing_Annotation) 1, + (:uniprot/External_Sequence) 1, + (_135e6ace0ba508ab2319c50063cc0ede) 1, + (_7a99d05307375d434d4a3f01c938cad7) 1, + (_b868ac3eb7960429cb539cea5f6300ae) 1, + (:uniprot/Gene) 2, + (_ade505809c0086211dc02c0f9464258e) 1, + (:uniprot/Resource) 1, + (_cfd54a0e1d32a8d62b76f3490d7f2311) 1, + (:uniprot/Transcript_Resource) 2, + (:uniprot/Published_Citation) 2, + (_e978c46c81fd658d3e99796c5eaf4502) 1, + (_5fcb391e3136fc76ba988a8b5f961505) 1, + (_941b9dd17dd300caedb9cb8c0e3f958e) 1, + (:uniprot/Disease_Annotation) 1, + (_d0e6352838bda96a5d57075fed61b8fc) 1, + (:uniprot/Simple_Sequence) 2, + (:uniprot/Protein) 13, + (:uniprot/Modified_Sequence) 1, + (:uniprot/Enzyme) 2, + (:uniprot/Catalytic_Activity_Annotation) 2, + (_49396fea4d19b3d5b39285cc1252056b) 1, + (_e2daa4cde5ccb48ceaa946a7c97ec83e) 1, + (:uniprot/Journal) 1, + (_3725fc1a96109bd014937233e0cc1e80) 1, + (:uniprot/Cluster) 2, + (_086cb0592bb819a0cd93d44d3bf577d8) 1, + (:uniprot/Binding_Site_Annotation) 2, + (_63f5f671dd82a005a9e91a5c22c458a5) 1, + (:uniprot/Structure_Mapping_Statement) 1, + (_a2745b16016d308213a790963118d9a8) 1, + (:rdfs/Resource) 1, + (:uniprot/Thesis_Citation) 1, + (_d84a5a698f10400d9ffba7376653fc21) 1, + (:uniprot/Nucleotide_Resource) 2, + (_9d3897b13bb29c76ec292b254542decb) 1, + (:uniprot/Subcellular_Location) 1, + (_612e2c86654093537c55009db223f480) 1, + (:uniprot/Book_Citation) 2, + (:uniprot/Kinetics_Annotation) 2, + (:uniprot/Sequence) 3, + (:uniprot/Cofactor_Annotation) 1, + (:uniprot/Catalytic_Activity) 1, + (:uniprot/Citation) 5, + (:uniprot/Database) 5, + (:uniprot/Submission_Citation) 1, + (:uniprot/Taxon) 4, + (:uniprot/Attribution) 1, + (:uniprot/Citation_Statement) 2} + + + + + + + + + + + + + + + + + +#_ +#:uniprot{:submittedName (:uniprot/Part :uniprot/Protein), + :organism (:uniprot/Protein :uniprot/Sequence), + :date (:uniprot/Attribution :uniprot/Citation), + :representativeFor (:uniprot/Protein :uniprot/Sequence), + :mnemonic (:uniprot/Cluster :uniprot/Protein), + :reviewed (:uniprot/Protein :uniprot/Taxon), + :place (:uniprot/Book_Citation :uniprot/Thesis_Citation), + :substitution (:uniprot/Mutagenesis_Annotation :uniprot/Natural_Variant_Annotation), + :method (:uniprot/Mass_Spectrometry_Annotation :uniprot/Structure_Resource), + :obsolete (:uniprot/Protein :uniprot/Taxon), + :orientation (:uniprot/Cellular_Component :uniprot/cellularComponent), + :created (:uniprot/Protein :uniprot/Resource), + :structuredName (:uniprot/Part :uniprot/Protein), + :modified (:uniprot/Cluster :uniprot/Protein), + :cellularComponent (:uniprot/Cellular_Component :uniprot/cellularComponent), + :pages (:uniprot/Book_Citation :uniprot/Journal_Citation), + :volume (:uniprot/Book_Citation :uniprot/Journal_Citation), + :citation (:uniprot/Cellular_Component :uniprot/Database), + :alternativeName (:uniprot/Part :uniprot/Protein), + :conflictingSequence (:uniprot/Protein :uniprot/Sequence_Caution_Annotation), + :enzyme (:uniprot/Part :uniprot/Protein), + :attribution (:uniprot/Protein), + :recommendedName (:uniprot/Part :uniprot/Protein), + :sequence (:uniprot/Annotation :uniprot/Protein), + :replaces (:uniprot/Enzyme :uniprot/Protein), + :mappedAnnotation (:uniprot/Citation_Statement :uniprot/Protein), + :version (:uniprot/Protein :uniprot/Sequence), + :replacedBy (:uniprot/Enzyme :uniprot/Protein), + :topology (:uniprot/Cellular_Component :uniprot/cellularComponent)} + + + + From 526d25fa84d203ad40f4f41372939975de435bb2 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Mon, 30 Jan 2023 23:23:58 -0800 Subject: [PATCH 09/11] ontology generated cleanly, and multilevel queries kind of working --- resources/uniprot-alzabo.edn | 36 ++++++++---------- src/clj/org/parkerici/enflame/sparql.clj | 3 +- .../parkerici/enflame/uniprot_ontology.clj | 38 ++++++++++++------- .../parkerici/enflame/sparql/generate.cljc | 2 +- 4 files changed, 43 insertions(+), 36 deletions(-) diff --git a/resources/uniprot-alzabo.edn b/resources/uniprot-alzabo.edn index 7502bf1..3681d75 100644 --- a/resources/uniprot-alzabo.edn +++ b/resources/uniprot-alzabo.edn @@ -1,6 +1,3 @@ -;;; NOTE: hand edited, don't overwrite! - - {:title "UNIPROT", :kinds {:Organelle @@ -47,9 +44,7 @@ :scientificName {:type :string, :uri :uniprot/scientificName, - :attribute :uniprot/scientificName - :name? true - }, + :attribute :uniprot/scientificName}, :reviewed {:type :boolean, :uri :uniprot/reviewed, @@ -67,8 +62,10 @@ :attribute :uniprot/obsolete, :doc "True if this resource has been replaced or deleted."}, :host {:type :Taxon, :uri :uniprot/host, :attribute :uniprot/host}, - ;; Apparently not! - #_ :label #_ {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, + :label + {:type :string, + :uri :uniprot/scientificName, + :attribute :uniprot/scientificName}, :synonym {:type :string, :uri :uniprot/synonym, @@ -135,7 +132,7 @@ :uri :uniprot/translatedFrom, :attribute :uniprot/translatedFrom}, :fragment - {:type :_934f2f4b1742160a7cabe73bfae412a8, + {:type :_6b6e68dc3dd2438642051e83c5d1bf84, :uri :uniprot/fragment, :attribute :uniprot/fragment, :doc @@ -149,7 +146,7 @@ :length {:type :int, :uri :uniprot/length, :attribute :uniprot/length}, :sequenceFor - {:type :_341fcbedbe860321d6a43b60b0bc5fe3, + {:type :_32ad4e65b5df61977d7de4210c121294, :uri :uniprot/sequenceFor, :attribute :uniprot/sequenceFor, :doc "A resource that describes this sequence."}, @@ -313,7 +310,7 @@ :doc "A publication from which data was by a extracted by a mapping from non UniProt origin, or which contains additional information."}, :replaces - {:type :_f1d2077fbda7736db042867774ef80f9, + {:type :_5714be8382f0d1adc921894c1fbb2dd1, :uri :uniprot/replaces, :attribute :uniprot/replaces, :doc "A resource that is replaced by this resource."}, @@ -331,7 +328,7 @@ :version {:type :int, :uri :uniprot/version, :attribute :uniprot/version}, :replacedBy - {:type :_8e5f52771e3b776839235d09ea0f4a4d, + {:type :_ce64111704c1e5e6af5a354e8540aeac, :uri :uniprot/replacedBy, :attribute :uniprot/replacedBy, :doc "A resource that replaces this resource."}, @@ -354,9 +351,8 @@ :Gene {:title "Gene", :fields - {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, - ;; Added by hand. Looks like at least some genes don't have :rdfs/label - :prefLabel {:type :string :uri :skos/prefLabel :attribute :skos/prefLabel} + {:label + {:type :string, :uri :skos/prefLabel, :attribute :skos/prefLabel}, :orfName {:type :string, :uri :uniprot/orfName, @@ -643,7 +639,7 @@ :doc "A rememberable string that can be used to find entries, not a stable identifier!"}, :someMembersClassifiedWith - {:type :_3c618eea3d8a02bf75309716a05b94be, + {:type :_0e18955f45d54a0023e960318b145869, :uri :uniprot/someMembersClassifiedWith, :attribute :uniprot/someMembersClassifiedWith, :doc @@ -654,7 +650,7 @@ :attribute :uniprot/modified, :doc "The date a resource was last modified."}, :identity - {:type :_fe11e2bb13384b4238907a7d7b640362, + {:type :_461878d3ae1962d08aa95249c92dce6f, :uri :uniprot/identity, :attribute :uniprot/identity, :doc "The level of sequence identity in a cluster."}, @@ -732,12 +728,12 @@ :fields {:label {:type :string, :uri :rdfs/label, :attribute :rdfs/label}, :replaces - {:type :_f1d2077fbda7736db042867774ef80f9, + {:type :_5714be8382f0d1adc921894c1fbb2dd1, :uri :uniprot/replaces, :attribute :uniprot/replaces, :doc "A resource that is replaced by this resource."}, :replacedBy - {:type :_8e5f52771e3b776839235d09ea0f4a4d, + {:type :_ce64111704c1e5e6af5a354e8540aeac, :uri :uniprot/replacedBy, :attribute :uniprot/replacedBy, :doc "A resource that replaces this resource."}, @@ -932,7 +928,7 @@ :uri :uniprot/disease, :attribute :uniprot/disease}, :locatedIn - {:type :_f1cd6d4fb521d72fcc3b9cadda2e2178, + {:type :_c425b8262b8c8f24983a3e71d90a3858, :uri :uniprot/locatedIn, :attribute :uniprot/locatedIn}, :measuredValue diff --git a/src/clj/org/parkerici/enflame/sparql.clj b/src/clj/org/parkerici/enflame/sparql.clj index 7c865d6..5bba559 100644 --- a/src/clj/org/parkerici/enflame/sparql.clj +++ b/src/clj/org/parkerici/enflame/sparql.clj @@ -12,7 +12,8 @@ ;;; Not clear what Aristotle does that isn't better handled by Jena SSE https://jena.apache.org/documentation/notes/sse.html -(def sparql-default-limit 2000) ;Sanity preservation. +;;; Warning: needs to be at lease 2700 for uniprot ontology query +(def sparql-default-limit 3000) ;Sanity preservation. (defn ->sparql [bgp & {:keys [limit] :or {limit sparql-default-limit}}] (let [query (-> bgp diff --git a/src/clj/org/parkerici/enflame/uniprot_ontology.clj b/src/clj/org/parkerici/enflame/uniprot_ontology.clj index 185cb44..91262ad 100644 --- a/src/clj/org/parkerici/enflame/uniprot_ontology.clj +++ b/src/clj/org/parkerici/enflame/uniprot_ontology.clj @@ -23,12 +23,9 @@ (eval (curried-api 'org.parkerici.enflame.sparql endpoint)) +;; Make Statement by a top classs, otherwise we get a lot of others like Endpoint_Statement (def external-ontology '{:rdf/Statement {:rdf/type (:owl/Class)} - ;; This way might be cleaner but more work - ;; :Taxon {:fields {:scientificName {:name? true}}} - :Taxon {:fields {:label {:type :string :uri :uniprot/scientificName :attribute :uniprot/scientificName}}} - :Gene {:fields {:label {:type :string :uri :skos/prefLable :attribute :skos/prefLable}}} }) ;;; ❖⟐❖ fixing blank-node domains ❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖⟐❖ @@ -54,7 +51,8 @@ ontology-in atts))) -;;; TODO shouldn't run on compile +;;; This is a map of all classes and attributes, and their properties +;;; incomplete though, it doesn't include skos properties eg (def uniprot-ontology (-> (sq/entify @@ -64,7 +62,7 @@ (merge external-ontology) ;; This one field comes back with an unserializable object, just patch it ;; Real thing (.-lexicalValue _) if need be - (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")) +; (assoc-in [:uniprot/Pathway :rdfs/label] '("Pathway")) fix-domains)) (defn filtered-by @@ -228,28 +226,40 @@ ) )) +(def schema-extras + { + ;; This way might be cleaner but more work + ;; :Taxon {:fields {:scientificName {:name? true}}} + :Taxon {:fields {:label {:type :string :uri :uniprot/scientificName :attribute :uniprot/scientificName}}} + :Gene {:fields {:label {:type :string :uri :skos/prefLabel :attribute :skos/prefLabel}}} + }) + + + (defn alzabo [] (u/clean-walk {:title "UNIPROT" :kinds - (apply - merge - (for [[tc tc-def] (top-classes)] + (u/merge-recursive + (apply + merge + (for [[tc tc-def] (top-classes)] {(nons tc) {:doc (first (:rdfs/comment tc-def)) :title (first (:rdfs/label tc-def)) ;not actually used or defined :fields (or (class-alzabo-fields tc) {}) :uri tc - }}))} + }})) + schema-extras)} nil?)) +(defn write-ontology + [] + (ju/schppit "resources/uniprot-alzabo.edn" (alzabo))) + -#_ -(ju/schppit "uniprot-ontology.edn" uniprot-ontology) -#_ -(ju/schppit "resources/uniprot-alzabo.edn" (alzabo)) #_ (frequencies (map :rdfs/domain (vals (properties)))) diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index 751e70c..53e3119 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -166,7 +166,7 @@ [{:keys [current-var] :as query} blockspec] (let [{:keys [attribute] :as blockdef} (spec-block-def blockspec) value (query-value blockspec blockdef "V") - var (if (= attribute :rdfs/label) ;TODO kludge + var (if (= "label %2 %1" (:message0 blockdef)) ;TODO kludge – better to put :label in blockdef somewher (label-var current-var) (?var (:attribute blockdef))) comp (keyword (query-value blockspec blockdef "comp"))] From 5a882dcbf9e1cf6cf479b33fe35ba81561b01c0d Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Mon, 6 Feb 2023 13:31:39 -0800 Subject: [PATCH 10/11] Add card for native-format SPARQL display --- resources/uniprot-config.edn | 1 + src/clj/org/parkerici/enflame/server.clj | 9 ++++ .../parkerici/enflame/view/card/sparql.cljs | 43 +++++++++++++++++++ src/cljs/org/parkerici/enflame/views.cljs | 4 +- 4 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/cljs/org/parkerici/enflame/view/card/sparql.cljs diff --git a/resources/uniprot-config.edn b/resources/uniprot-config.edn index 140fe67..739d4cd 100644 --- a/resources/uniprot-config.edn +++ b/resources/uniprot-config.edn @@ -16,6 +16,7 @@ :port 1992 :dev? true ;TODO control this somehow maybe aero/env :rh-cards [:query + :sparql #_ :share :compact ;debug only :browser ;someday diff --git a/src/clj/org/parkerici/enflame/server.clj b/src/clj/org/parkerici/enflame/server.clj index 0ed5712..8cd1d40 100644 --- a/src/clj/org/parkerici/enflame/server.clj +++ b/src/clj/org/parkerici/enflame/server.clj @@ -57,6 +57,14 @@ (response/response {:count (count results) :clipped (count clipped) :results clipped}))) +;;; Kludge because edn → sparql needs to be done on server +(defn handle-query-translate + [req config] + (let [{:keys [query]} (:params req)] + (response/response + (case (:type (:source config)) + :sparql (sparql/->sparql (read-string query)))))) + (defn handle-download [req config] (let [{:keys [query db]} (:params req) @@ -136,6 +144,7 @@ (GET "/schema" [version] (response/response (config/read-schema version))) (GET "/query" req (handle-query req config)) + (GET "/query-translate" req (handle-query-translate req config)) (context "/library" [] (GET "/get" [key] (handle-get key)) diff --git a/src/cljs/org/parkerici/enflame/view/card/sparql.cljs b/src/cljs/org/parkerici/enflame/view/card/sparql.cljs new file mode 100644 index 0000000..4012666 --- /dev/null +++ b/src/cljs/org/parkerici/enflame/view/card/sparql.cljs @@ -0,0 +1,43 @@ +(ns org.parkerici.enflame.view.card.sparql + (:require + [re-frame.core :as rf] + [org.parkerici.enflame.view.utils :as vu] + [reagent.dom.server] + [org.parkerici.multitool.core :as u] + [org.parkerici.enflame.api :as api] + ) + ) + +;;; This should all be bundled up in an abstraction, its ridiculous to +;;; have to choreograph by hand this kind of thing over and over. TODO! + +(rf/reg-event-db + :native-fetch + (fn [db [_ q]] + (api/ajax-get "/api/query-translate" + {:url-params {:query (str q)} + :response-format :text + :handler (fn [resp] + (rf/dispatch [:got-translation q resp]))}) + (assoc-in db [:card :sparql :native-query q] "Pending..."))) + +;;; A small trick, using the reframe db for memoization. Might as well! +(rf/reg-event-db + :got-translation + (fn [db [_ q trans]] + (assoc-in db [:card :sparql :native-query q] trans))) + +(rf/reg-sub + :native-query + (fn [db [_ q]] + (or (get-in db [:card :sparql :native-query q]) + (do (rf/dispatch [:native-fetch q]) + "pending...")))) + +(defn card + [] + (let [query @(rf/subscribe [:query]) + native @(rf/subscribe [:native-query query])] + [vu/card "SPARQL" + [:pre + native]])) diff --git a/src/cljs/org/parkerici/enflame/views.cljs b/src/cljs/org/parkerici/enflame/views.cljs index dd2f6c6..16df0aa 100644 --- a/src/cljs/org/parkerici/enflame/views.cljs +++ b/src/cljs/org/parkerici/enflame/views.cljs @@ -14,6 +14,7 @@ [org.parkerici.enflame.candel.query :as query] ;TOODO [org.parkerici.enflame.datomic :as datomic] [org.parkerici.enflame.view.candel-cards :as candel] + [org.parkerici.enflame.view.card.sparql :as sparql] ;TODO other cards in this namespace [org.parkerici.enflame.view.library :as library] ) ) @@ -329,7 +330,8 @@ :xml xml-card :share library/share-card :browser obrowser/browser - :graph graph-pane}) + :graph graph-pane + :sparql sparql/card}) (defn rh-panel [] From 6fc8dfe41554b6690e80ea512d042a2a5c3dc015 Mon Sep 17 00:00:00 2001 From: Mike Travers Date: Wed, 8 Feb 2023 09:45:11 -0800 Subject: [PATCH 11/11] gen tweaks --- src/cljc/org/parkerici/enflame/blockdefs.cljc | 2 ++ .../org/parkerici/enflame/sparql/generate.cljc | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/cljc/org/parkerici/enflame/blockdefs.cljc b/src/cljc/org/parkerici/enflame/blockdefs.cljc index 6bf9681..c2591fc 100644 --- a/src/cljc/org/parkerici/enflame/blockdefs.cljc +++ b/src/cljc/org/parkerici/enflame/blockdefs.cljc @@ -78,6 +78,8 @@ (defn kind-color [kind] (or (kind-defined-color kind) + ;; Crock, makes uniprot use Candel-compatible colors for common classes + (kind-defined-color (keyword (clojure.string/lower-case (name kind)))) ;; Will do something vaguely reasonable for unknown kinds (TODO would be good to control the color a bit) (str "#" (u/hex-string (mod (hash kind) 0xffffff))))) diff --git a/src/cljc/org/parkerici/enflame/sparql/generate.cljc b/src/cljc/org/parkerici/enflame/sparql/generate.cljc index 53e3119..5ebc9e2 100644 --- a/src/cljc/org/parkerici/enflame/sparql/generate.cljc +++ b/src/cljc/org/parkerici/enflame/sparql/generate.cljc @@ -94,7 +94,7 @@ ;;; Actual ;;; TODO copypasta from candel.query, could be abstracted up -(defmulti build-query (fn [_ blockspec] +(defmulti build-query (fn [x blockspec] (-> blockspec :type blockdefs/block-def @@ -107,7 +107,7 @@ (defn build-top-query [blockspec] - #_ (reset-vars) + (reset-vars) (when blockspec (let [{:keys [filter where find] :as built} (build-query {} (assoc blockspec :top? true)) @@ -131,7 +131,7 @@ (defn kind-label [kind] - :rdfs/label) + (get-in (schema/kind-def kind) [:fields :label :uri])) (defmethod build-query :query-builder-query [{:keys [current-var] :as _query} {:keys [top?] :as blockspec}] @@ -150,11 +150,11 @@ subquery-filters (mapcat :filter subqueries) base-query {:find (concat (select-terms output-var output-type) - subquery-selects) + subquery-selects) :where (if-let [label-attribute - (and top? - (empty? subquery-wheres) - (kind-label output))] + (and ;; top? + ;; (empty? subquery-wheres) + (kind-label output))] (cons [output-var label-attribute (label-var output-var)] base-wheres) base-wheres) :filter subquery-filters