diff --git a/src/rest_api/classes/cds.clj b/src/rest_api/classes/cds.clj index 7961d014..2f1b5206 100644 --- a/src/rest_api/classes/cds.clj +++ b/src/rest_api/classes/cds.clj @@ -16,7 +16,7 @@ {:overview overview/widget :location location/widget :feature feature/widget - ;:sequences sequences/widget + :sequences sequences/widget :reagents reagents/widget :external_links external-links/widget :references references/widget} diff --git a/src/rest_api/classes/cds/widgets/overview.clj b/src/rest_api/classes/cds/widgets/overview.clj index 08680f60..e74b0a05 100644 --- a/src/rest_api/classes/cds/widgets/overview.clj +++ b/src/rest_api/classes/cds/widgets/overview.clj @@ -7,7 +7,6 @@ (defn description [cds] {:data (:cds.detailed-description/text (first (:cds/detailed-description cds))) - :d (:db/id cds) :description (str "description of the CDS " (:cds/id cds))}) (defn partial-field [cds] diff --git a/src/rest_api/classes/cds/widgets/sequences.clj b/src/rest_api/classes/cds/widgets/sequences.clj index 0419d803..cfd139e6 100644 --- a/src/rest_api/classes/cds/widgets/sequences.clj +++ b/src/rest_api/classes/cds/widgets/sequences.clj @@ -7,21 +7,15 @@ [rest-api.classes.generic-functions :as generic-functions] [rest-api.formatters.object :as obj :refer [pack-obj]])) -(defn unspliced-sequence-context [c] - {:data (when-let [transcript (-> c +(defn cds-sequence [c] + {:data (when-let [transcript (or + (-> c :transcript.corresponding-cds/_cds first - :transcript/_corresponding-cds)] - (sequence-fns/transcript-sequence-features transcript 0 "unspliced")) - :description "the unpliced sequence of the sequence"}) - -(defn spliced-sequence-context [c] - {:data (when-let [transcript (-> c - :transcript.corresponding-cds/_cds - first - :transcript/_corresponding-cds)] - (sequence-fns/transcript-sequence-features transcript 0 "spliced")) - :description "the unpliced sequence of the sequence"}) + :transcript/_corresponding-cds) + c)] + (sequence-fns/transcript-sequence-features transcript 0 :cds)) + :description "the spliced sequence of the transcripts without UTR"}) (defn protein-sequence [c] {:data (when-let [peptide (some->> (:cds/corresponding-protein c) @@ -46,9 +40,6 @@ (def widget {:name generic/name-field - :predicted_exon_structure generic/predicted-exon-structure :print_blast print-blast :protein_sequence protein-sequence - :predicted_unit generic/predicted-units - :unspliced_sequence_context unspliced-sequence-context - :spliced_sequence_context spliced-sequence-context}) + :cds_sequence cds-sequence}) diff --git a/src/rest_api/classes/generic_fields.clj b/src/rest_api/classes/generic_fields.clj index 0c4f8c16..88067b81 100644 --- a/src/rest_api/classes/generic_fields.clj +++ b/src/rest_api/classes/generic_fields.clj @@ -89,9 +89,7 @@ [(some->> (:cds.corresponding-protein/_protein object) (map :cds/_corresponding-protein) (filter #(not= "history" (:method/id (:locatable/method %)))) - (map :gene.corresponding-cds/_cds) - first - (map :gene/_corresponding-cds)) + first) "gene"] :else @@ -535,19 +533,33 @@ (corresponding-all-gene gene object role nil)))) "cds" - (when-let [ths (:transcript.corresponding-cds/_cds object)] - (let [genes - (distinct - (flatten - (for [th ths - :let [ghs (:gene.corresponding-transcript/_transcript - (:transcript/_corresponding-cds th))]] - (for [gh ghs - :let [gene (:gene/_corresponding-transcript gh)]] - gene))))] - (flatten - (for [gene genes] - (corresponding-all-gene gene object role nil))))) + (or + (when-let [ths (:transcript.corresponding-cds/_cds object)] + (let [genes + (distinct + (flatten + (for [th ths + :let [ghs (:gene.corresponding-transcript/_transcript + (:transcript/_corresponding-cds th))]] + (for [gh ghs + :let [gene (:gene/_corresponding-transcript gh)]] + gene))))] + (flatten + (for [gene genes] + (corresponding-all-gene gene object role nil))))) + (when-let [ths (:transposon.corresponding-cds/_cds object)] + (let [genes + (distinct + (flatten + (for [th ths + :let [ghs (:gene.corresponding-transposon/_transposon + (:transposon/_corresponding-cds th))]] + (for [gh ghs + :let [gene (:gene/_corresponding-transposon gh)]] + gene))))] + (flatten + (for [gene genes] + (corresponding-all-gene gene object role nil)))))) "protein" ;; need to make it filter for only the row with the protein (when-let [cdshs (:cds.corresponding-protein/_protein object)] diff --git a/src/rest_api/classes/pseudogene.clj b/src/rest_api/classes/pseudogene.clj index fd76e382..f916b72b 100644 --- a/src/rest_api/classes/pseudogene.clj +++ b/src/rest_api/classes/pseudogene.clj @@ -17,7 +17,7 @@ :feature feature/widget :genetics genetics/widget :reagents reagents/widget - ;:sequences sequences/widget + :sequences sequences/widget :expression expression/widget :location location/widget } diff --git a/src/rest_api/classes/pseudogene/widgets/sequences.clj b/src/rest_api/classes/pseudogene/widgets/sequences.clj index 88fc2a64..0c4eaa2a 100644 --- a/src/rest_api/classes/pseudogene/widgets/sequences.clj +++ b/src/rest_api/classes/pseudogene/widgets/sequences.clj @@ -12,7 +12,7 @@ (name strand-kw)) :description "strand orientation of the sequence"}) -(defn print-sequence [p] +(defn sequence-context [p] {:data (when-let [refseqobj (sequence-fns/genomic-obj p)] (when-let [dna-sequence (sequence-fns/get-sequence refseqobj)] (let [strand (if-let [strand-kw (:locatable/strand p)] @@ -36,4 +36,4 @@ {:name generic/name-field :predicted_exon_structure generic/predicted-exon-structure :strand strand - :print_sequence print-sequence}) + :sequence_context sequence-context}) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 8595e47e..2560e97c 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -16,15 +16,15 @@ (defn get-g-species [object role] (when-let [species-name (:species/id - (or - ((keyword role "species") object) - (or - (:clone/species - (first - (:pcr-product/clone object))) - (:transcript/species - (first - (:transcript/_corresponding-pcr-product object))))))] + (or + ((keyword role "species") object) + (or + (:clone/species + (first + (:pcr-product/clone object))) + (:transcript/species + (first + (:transcript/_corresponding-pcr-product object))))))] (generic-functions/xform-species-name species-name))) (defn get-segments [object] @@ -35,12 +35,13 @@ (when sequence-database (sequence-features sequence-database (id-kw object) role))))) -(defn get-transcript-segments [object] - (let [g-species (get-g-species object "transcript") +(defn get-transcript-segments [object feature-id] + (let [g-species (or (get-g-species object "transcript") + (get-g-species object "cds")) sequence-database (seqdb/get-default-sequence-database g-species)] (when sequence-database (when-let [db ((keyword sequence-database) wb-seq/sequence-dbs)] - (wb-seq/get-seq-features db (:transcript/id object)))))) + (wb-seq/get-seq-features db feature-id))))) (defn longest-segment [segments] (first @@ -51,6 +52,11 @@ (if (seq segments) (longest-segment segments)))) +(defn get-transcript-segment [object] + (some->> (get-segments object) + (filter #(not (str/starts-with? (:type %) "Gene"))) + (first))) + (defn create-genomic-location-obj [start stop object segment tracks gbrowse] (let [id-kw (first (filter #(= (name %) "id") (keys object))) role (namespace id-kw) @@ -69,6 +75,7 @@ (pace-utils/vmap :class "genomic_location" :id id + :feature_id (id-kw object) :label label :pos_string id :seqname (:seqname segment) @@ -78,14 +85,19 @@ :tracks tracks))) (defn genomic-obj [object] - (when-let [segment (get-longest-segment object)] + (let [id-kw (first (filter #(= (name %) "id") (keys object))) + role (namespace id-kw)] + (when-let [segment (if (or (= "cds" role) + (= "transcript" role)) + (get-transcript-segment object) + (get-longest-segment object))] (let [[start stop] (->> segment ((juxt :start :end)) (sort-by +))] - (create-genomic-location-obj start stop object segment nil true)))) + (create-genomic-location-obj start stop object segment nil true))))) -(defn genomic-obj-child-positions [object] - (some->> (get-transcript-segments object) +(defn genomic-obj-child-positions [object feature-id] + (some->> (get-transcript-segments object feature-id) (map (fn [feature] (conj feature @@ -129,62 +141,112 @@ :stop feature-end :type (:type feature)})) -(defn transcript-sequence-features [transcript padding status] ; status can be spliced or unspliced +(defn- get-spliced-exon-positions [positive-features] + (let [last-stop (atom 0)] + (for [feature (sort-by :start positive-features) + :when (= (:type feature) :exon) + :let [last-stop-position @last-stop + new-stop-position (+ last-stop-position + (+ 1 + (- (:stop feature) (:start feature)))) + new-start-position (+ 1 last-stop-position)]] + (do (reset! last-stop new-stop-position) + {:start new-start-position + :stop new-stop-position + :type (:type feature)})))) + +(defn- add-introns [features] + (let [features-with-introns (atom ()) + intron-and-exon-features (filter #(or (= "exon" (:type %)) + (= "intron" (:type %))) features) + non-intron-and-exon-features (filter #(and (not= "exon" (:type %)) + (not= "intron" (:type %))) features)] + (do + (doseq [feature (sort-by :start intron-and-exon-features)] + (if (or (= (count @features-with-introns) 0) + (= (:stop (last @features-with-introns)) + (- (:start feature) 1))) + (swap! features-with-introns conj feature) + (do + (swap! features-with-introns conj (let [stop (- (:start feature) 1)] + {:start (+ (:stop (last + (filter + #(< (:stop %) stop) + (sort-by :start @features-with-introns)))) + 1) + :stop stop + :type "intron"})) + (swap! features-with-introns conj feature)))) + (doseq [feature (sort-by :start non-intron-and-exon-features)] + (swap! features-with-introns conj feature)) + @features-with-introns))) + +(defn- add-padding-to-feature-list [features padding length] + (when (> padding 0) + ((comp vec flatten conj) features + [{:type :padding + :start 1 + :stop padding} + {:type :padding + :start (+ (- length padding) 1) + :stop length}]))) + +(defn transcript-sequence-features [transcript padding status] (when-let [refseq-obj (genomic-obj transcript)] - (let [sequence-strand (if (> (:start refseq-obj) (:stop refseq-obj)) - "negative" - "positive") - seq-features (genomic-obj-child-positions transcript) - three-prime-utr (filter (comp #{"three_prime_UTR"} :type) seq-features) - five-prime-utr (filter (comp #{"five_prime_UTR"} :type) seq-features) - features-raw (when-let [features seq-features] - (some->> features - (map (fn [feature] - (when (not= "CDS" (:type feature)) - {:start (+ 1 - (+ padding - (if (= sequence-strand "positive") - (- (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:start feature)) - (:start refseq-obj)) - (- (:stop feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:stop refseq-obj)))))) - :stop (+ 1 - (+ padding - (if (= sequence-strand "positive") - (- (:stop feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:start refseq-obj))) - (- (:start feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:stop refseq-obj)))))) - :type (:type feature)}))) - (remove nil?))) + (let [seq-features (genomic-obj-child-positions transcript (:feature_id refseq-obj)) + status-parts (case status + :spliced + #{:exon :three_prime_UTR :five_prime_UTR} + + :cds + #{:exon} + + #{:intron :exon :three_prime_UTR :five_prime_UTR}) + three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) + five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) + cds (first (filter (comp #{"CDS"} :type) seq-features)) + mrna (first (filter (comp #{"mRNA"} :type) seq-features)) + sequence-strand (if (some nil? [three-prime-utr five-prime-utr]) + (when-let [strand (:locatable/strand transcript)] + (cond + (= strand :locatable.strand/negative) "-" + (= strand :locatable.strand/positive) "+")) + (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-")) + context-obj (if (and (= status :cds) (some? cds)) cds refseq-obj) + [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) + [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] + [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) + + seq-features-with-introns (add-introns seq-features) + positive-features (some->> seq-features-with-introns + (map (fn [feature] + (let [feature-type (keyword (:type feature)) + [left-position right-position] + (if (neg? (- (:start feature) (:stop feature))) + [(:start feature) (:stop feature)] + [(:stop feature) (:start feature)])] + (when (and (not= feature-type :CDS) + (and (not= feature-type :mRNA) + (not + (and (= status :cds) + (or (= feature-type :five_prime_UTR) + (= feature-type :three_prime_UTR)))))) + {:start (let [start (+ 1 (- left-position context-left))] + (if (neg? start) 1 start)) + :stop (let [stop (+ 1 (- right-position context-left))] + (let [length (+ 1 (- context-right context-left))] + (if (> stop length) length stop))) + :type feature-type})))) + (remove nil?)) sequence-positive-raw (get-sequence (conj refseq-obj - (if (= sequence-strand "positive") - {:start (if-let [three-prime-utr-start (:start three-prime-utr)] - (- three-prime-utr-start padding) - (- (:start refseq-obj) padding)) - :stop (if-let [five-prime-utr-stop (:stop five-prime-utr)] - (+ five-prime-utr-stop padding) - (+ (:stop refseq-obj) padding))} - {:start (if-let [five-prime-utr-start (:start five-prime-utr)] - (- five-prime-utr-start padding) - (- (:stop refseq-obj) padding)) - :stop (if-let [three-prime-utr-start (:start three-prime-utr)] - (+ three-prime-utr-start padding) - (+ (:start refseq-obj) padding))}))) + {:start context-left + :stop context-right})) sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] (do - (doseq [feature features-raw - :when (= "exon" (:type feature))] + (doseq [feature positive-features + :when (= :exon (:type feature))] (swap! dna-sequence assoc :seq @@ -195,9 +257,9 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (doseq [feature features-raw - :when (or (= "three_prime_UTR" (:type feature)) - (= "five_prime_UTR" (:type feature)))] + (doseq [feature positive-features + :when (or (= :three_prime_UTR (:type feature)) + (= :five_prime_UTR (:type feature)))] (swap! dna-sequence assoc :seq @@ -208,9 +270,9 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (if (= status "spliced-plus-utr") - (doseq [feature (reverse (sort-by :start features-raw)) - :when (contains? (set `("intron" "three_prime_UTR" "five_prime_UTR")) (:type feature))] + (if (contains? #{:cds :spliced} status) + (doseq [feature (reverse (sort-by :start positive-features)) + :when (not (some #(= (:type feature) %) status-parts))] (swap! dna-sequence assoc :seq @@ -220,52 +282,51 @@ (- (:start feature) 1) (+ 1 (- (:stop feature) - (:start feature))))))) - (if (= status "spliced") - (doseq [feature (reverse (sort-by :start features-raw)) - :when (= "intron" (:type feature))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "remove" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature))))))) + (:start feature)))))))) + (:seq @dna-sequence)) + modified-positive-features (case status + :unspliced + positive-features - (:seq @dna-sequence))) + :cds + (get-spliced-exon-positions positive-features) - positive-features (if (= status "unspliced") - features-raw - (let [pos-features (atom {:features ()}) - next-feature-start (atom 1)] - (do - (doseq [feature (sort-by :start features-raw) - :when (and - (not= "three_prime_UTR" (:type feature)) - (not= "five_prime_UTR" (:type feature)) - (not= "intron" (:type feature))) - :let [feature-end (+ (deref next-feature-start) (- (:stop feature) (:start feature)))]] - (swap! pos-features - assoc - :features - (add-feature (:features @pos-features) feature (deref next-feature-start) feature-end)) - (reset! next-feature-start (+ 1 (+ (deref next-feature-start) (- (:stop feature) (:start feature)))))) - (:features @pos-features))))] - {:positive-strand - {:features positive-features - :sequence sequence-positive} - :negative-strand - {:features (when-let [seq-length (count sequence-positive)] - (let [neg-features (atom {:features ()})] - (do - (doseq [feature positive-features] - (swap! neg-features - assoc - :features - (feature-complement (:features @neg-features) feature seq-length))) - (:features @neg-features)))) - :sequence (generic-functions/dna-reverse-complement sequence-positive)} - :sequence_strand sequence-strand}))) + :spliced + (remove nil? + (flatten + (conj + (get-spliced-exon-positions positive-features) + (if (= sequence-strand "+") + (first (filter #(= (:type %) :five_primeUTR) positive-features)) + (first (filter #(= (:type %) :three_prime_UTR) positive-features))) + (let [feature (if (= sequence-strand "+") + (first (filter #(= (:type %) :three_prime_UTR) positive-features)) + (first (filter #(= (:type %) :five_prime_UTR) positive-features))) + end (count sequence-positive)] + (if (some? feature) + (conj + feature + {:start (- end + (+ 1 (- (:stop feature) (:start feature)))) + :stop (count sequence-positive)}))))))) + modified-positive-features-with-padding (if (> padding 0) + (add-padding-to-feature-list + modified-positive-features + padding + (count sequence-positive)) + modified-positive-features)] + {:positive_strand + {:features modified-positive-features-with-padding + :sequence sequence-positive} + :negative_strand + {:features (when-let [seq-length (count sequence-positive)] + (let [neg-features (atom {:features ()})] + (do + (doseq [feature modified-positive-features-with-padding] + (swap! neg-features + assoc + :features + (feature-complement (:features @neg-features) feature seq-length))) + (:features @neg-features)))) + :sequence (generic-functions/dna-reverse-complement sequence-positive)} + :strand sequence-strand}))) diff --git a/src/rest_api/classes/transcript.clj b/src/rest_api/classes/transcript.clj index cbeb7127..66a22595 100644 --- a/src/rest_api/classes/transcript.clj +++ b/src/rest_api/classes/transcript.clj @@ -42,7 +42,7 @@ :external_links external-links/widget :location location/widget :feature feature/widget - ;:sequences sequences/widget + :sequences sequences/widget :references references/widget} :field {:fpkm_expression_summary_ls exp/fpkm-expression-summary-ls}}) diff --git a/src/rest_api/classes/transcript/widgets/sequences.clj b/src/rest_api/classes/transcript/widgets/sequences.clj index 62f2d33b..da2bf27c 100644 --- a/src/rest_api/classes/transcript/widgets/sequences.clj +++ b/src/rest_api/classes/transcript/widgets/sequences.clj @@ -13,20 +13,22 @@ :description "strand orientation of the sequence"}) (defn unspliced-sequence-context-with-padding [t] - {:data (sequence-fns/transcript-sequence-features t 2001 "unspliced") + {:data (sequence-fns/transcript-sequence-features t 2000 :unspliced) :description "the unpliced sequence of the sequence"}) (defn unspliced-sequence-context [t] - {:data (sequence-fns/transcript-sequence-features t 0 "unspliced") + {:data (sequence-fns/transcript-sequence-features t 0 :unspliced) :description "the unpliced sequence of the sequence"}) (defn spliced-sequence-context [t] - {:data (sequence-fns/transcript-sequence-features t 0 "spliced-plus-utr") + {:data (sequence-fns/transcript-sequence-features t 0 :spliced) :description "the spliced sequence of the sequence"}) (defn protein-sequence [t] - {:data (when-let [peptide (some->> (:transcript/corresponding-protein t) - (:transcript.corresponding-protein/protein) + {:data (when-let [peptide (some->> (->> t :transcript/corresponding-cds + :transcript.corresponding-cds/cds + :cds/corresponding-protein) + (:cds.corresponding-protein/protein) (:protein/peptide) (:protein.peptide/peptide) (:peptide/sequence))] diff --git a/src/rest_api/db/sequence.clj b/src/rest_api/db/sequence.clj index 5d559ce0..63edd484 100644 --- a/src/rest_api/db/sequence.clj +++ b/src/rest_api/db/sequence.clj @@ -61,10 +61,17 @@ (sequencesql/get-features-by-id db-spec attribute))))) (defn sequence-features-where-type [db-spec feature-name method] - (sequencesql/sequence-features-where-type - db-spec - {:name feature-name - :tag method})) + (let [features (sequencesql/sequence-features-where-type + db-spec + {:name feature-name + :tag method}) ] + (if (> (count features) 0) + features + (when (= method "CDS%") + (sequencesql/sequence-features-where-type + db-spec + {:name feature-name + :tag "mRNA%"}))))) (defn variation-features [db-spec variation-name] (sequencesql/variation-features diff --git a/src/rest_api/db/sql/sequence.sql b/src/rest_api/db/sql/sequence.sql index b2a03dc7..32336b46 100644 --- a/src/rest_api/db/sql/sequence.sql +++ b/src/rest_api/db/sql/sequence.sql @@ -65,14 +65,14 @@ AND s.offset = :offset -- :name get-seq-features :? :* -- :doc Retreive all sequence features from transcript -SELECT tc.tag,fc.start AS start,fc.end AS stop +SELECT t.tag,fc.start AS start,fc.end AS stop FROM feature as f LEFT OUTER JOIN parent2child as pc ON pc.id=f.id -LEFT OUTER JOIN typelist as t ON t.id=f.typeid LEFT OUTER JOIN feature as fc ON pc.child=fc.id -LEFT OUTER JOIN typelist as tc ON tc.id=fc.typeid +LEFT OUTER JOIN typelist as t ON t.id=fc.typeid LEFT OUTER JOIN name as n ON n.id=f.id WHERE n.name = :name AND (t.tag LIKE "transcript%" OR t.tag LIKE "CDS%" + OR t.tag LIKE "exon%" OR t.tag LIKE "mRNA%")