From fb866fdb74581597de80b40325b6f32dc3a8f3f9 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Mon, 15 Apr 2019 18:37:07 +0000 Subject: [PATCH 01/15] starting to make the cds sequence --- src/rest_api/classes/sequence/core.clj | 41 +++++++++++++------ .../classes/transcript/widgets/sequences.clj | 2 +- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index ea87a9f6..612057ee 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -138,32 +138,47 @@ :stop feature-end :type (:type feature)})) -(defn transcript-sequence-features [transcript padding status] ; status can be spliced or unspliced +(defn transcript-sequence-features [transcript padding status] ; status can be spliced, spliced-with-utr, and unspliced (when-let [refseq-obj (genomic-obj transcript)] - (let [sequence-strand (if (> (:start refseq-obj) (:stop refseq-obj)) - "negative" - "positive") + (let [ + dddd (println refseq-obj) seq-features (genomic-obj-child-positions transcript) - three-prime-utr (filter (comp #{"three_prime_UTR"} :type) seq-features) - five-prime-utr (filter (comp #{"five_prime_UTR"} :type) seq-features) + ddddd (println seq-features) + three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) + five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) + sequence-strand (if (> (:start three-prime-utr) (:start five-prime-utr)) "+" "-") + start (if (= "+" sequence-strand) + (if (= status "spliced") + (+ 1 (:stop five-prime-utr)) + (:start five-prime-utr)) + (if (= status "spliced") + (- (:start three-prime-utr) 1) + (:stop three-prime-utr))) + stop (if (= "+" sequence-strand) + (if (= status "spliced") + (- (:start three-prime-utr) 1) + (:stop three-prime-utr)) + (if (= status "spliced") + (+ (:stop five-prime-utr) 1) + (:start three-prime-utr))) features-raw (when-let [features seq-features] (some->> features (map (fn [feature] (when (not= "CDS" (:type feature)) {:start (+ 1 (+ padding - (if (= sequence-strand "positive") - (- (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start + (if (= sequence-strand "+") + (- (if (= status "spliced-with-utr" (if-let [five-prime-utr-start (:start five-prime-utr)] + five-prime-utr-start (:start feature)) (:start refseq-obj)) (- (:stop feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] + (if-let [three-prime-utr-start (:start three-prime-utr)] three-prime-utr-start (:stop refseq-obj)))))) :stop (+ 1 (+ padding - (if (= sequence-strand "positive") + (if (= sequence-strand "+") (- (:stop feature) (if-let [three-prime-utr-start (:start three-prime-utr)] three-prime-utr-start @@ -177,7 +192,7 @@ sequence-positive-raw (get-sequence (conj refseq-obj - (if (= sequence-strand "positive") + (if (= sequence-strand "+") {:start (if-let [three-prime-utr-start (:start three-prime-utr)] (- three-prime-utr-start padding) (- (:start refseq-obj) padding)) @@ -217,7 +232,7 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (if (= status "spliced-plus-utr") + (if (= status "spliced-with-utr") (doseq [feature (reverse (sort-by :start features-raw)) :when (contains? (set `("intron" "three_prime_UTR" "five_prime_UTR")) (:type feature))] (swap! dna-sequence diff --git a/src/rest_api/classes/transcript/widgets/sequences.clj b/src/rest_api/classes/transcript/widgets/sequences.clj index 62f2d33b..ba659999 100644 --- a/src/rest_api/classes/transcript/widgets/sequences.clj +++ b/src/rest_api/classes/transcript/widgets/sequences.clj @@ -21,7 +21,7 @@ :description "the unpliced sequence of the sequence"}) (defn spliced-sequence-context [t] - {:data (sequence-fns/transcript-sequence-features t 0 "spliced-plus-utr") + {:data (sequence-fns/transcript-sequence-features t 0 "spliced-with-utr") :description "the spliced sequence of the sequence"}) (defn protein-sequence [t] From 487cf935c745b256f1b8c7d88f0fb1d6c453981d Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 16 Apr 2019 22:52:50 +0000 Subject: [PATCH 02/15] got it working with just cds --- src/rest_api/classes/cds.clj | 2 +- .../classes/cds/widgets/sequences.clj | 17 +- src/rest_api/classes/sequence/core.clj | 171 +++++++----------- src/rest_api/classes/transcript.clj | 2 +- .../classes/transcript/widgets/sequences.clj | 6 +- 5 files changed, 71 insertions(+), 127 deletions(-) diff --git a/src/rest_api/classes/cds.clj b/src/rest_api/classes/cds.clj index 7961d014..2f1b5206 100644 --- a/src/rest_api/classes/cds.clj +++ b/src/rest_api/classes/cds.clj @@ -16,7 +16,7 @@ {:overview overview/widget :location location/widget :feature feature/widget - ;:sequences sequences/widget + :sequences sequences/widget :reagents reagents/widget :external_links external-links/widget :references references/widget} diff --git a/src/rest_api/classes/cds/widgets/sequences.clj b/src/rest_api/classes/cds/widgets/sequences.clj index 0419d803..3994a671 100644 --- a/src/rest_api/classes/cds/widgets/sequences.clj +++ b/src/rest_api/classes/cds/widgets/sequences.clj @@ -7,20 +7,12 @@ [rest-api.classes.generic-functions :as generic-functions] [rest-api.formatters.object :as obj :refer [pack-obj]])) -(defn unspliced-sequence-context [c] +(defn cds-sequence [c] {:data (when-let [transcript (-> c :transcript.corresponding-cds/_cds first :transcript/_corresponding-cds)] - (sequence-fns/transcript-sequence-features transcript 0 "unspliced")) - :description "the unpliced sequence of the sequence"}) - -(defn spliced-sequence-context [c] - {:data (when-let [transcript (-> c - :transcript.corresponding-cds/_cds - first - :transcript/_corresponding-cds)] - (sequence-fns/transcript-sequence-features transcript 0 "spliced")) + (sequence-fns/transcript-sequence-features transcript 0 :cds)) :description "the unpliced sequence of the sequence"}) (defn protein-sequence [c] @@ -46,9 +38,6 @@ (def widget {:name generic/name-field - :predicted_exon_structure generic/predicted-exon-structure :print_blast print-blast :protein_sequence protein-sequence - :predicted_unit generic/predicted-units - :unspliced_sequence_context unspliced-sequence-context - :spliced_sequence_context spliced-sequence-context}) + :cds_sequence cds-sequence}) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 612057ee..f8f3e99a 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -16,15 +16,15 @@ (defn get-g-species [object role] (when-let [species-name (:species/id - (or - ((keyword role "species") object) - (or - (:clone/species - (first - (:pcr-product/clone object))) - (:transcript/species - (first - (:transcript/_corresponding-pcr-product object))))))] + (or + ((keyword role "species") object) + (or + (:clone/species + (first + (:pcr-product/clone object))) + (:transcript/species + (first + (:transcript/_corresponding-pcr-product object))))))] (generic-functions/xform-species-name species-name))) (defn get-segments [object] @@ -55,19 +55,19 @@ (let [id-kw (first (filter #(= (name %) "id") (keys object))) role (namespace id-kw) calc-browser-pos (fn [x-op x y mult-offset] - (if gbrowse - (->> (reduce - (sort-by - [x y])) - (double) - (* mult-offset) - (int) - (x-op x)) - y)) + (if gbrowse + (->> (reduce - (sort-by - [x y])) + (double) + (* mult-offset) + (int) + (x-op x)) + y)) browser-start (calc-browser-pos - start stop 0.2) browser-stop (calc-browser-pos + stop start 0.5) id (str (:seqname segment) ":" browser-start ".." browser-stop) label (if (= img true) - id - (str (:seqname segment) ":" start ".." stop))] + id + (str (:seqname segment) ":" start ".." stop))] (pace-utils/vmap :class "genomic_location" :id id @@ -82,15 +82,15 @@ (defn genomic-obj [object] (when-let [segment (get-longest-segment object)] (let [[start stop] (->> segment - ((juxt :start :end)) - (sort-by +))] + ((juxt :start :end)) + (sort-by +))] (create-genomic-location-obj start stop object segment nil true true)))) (defn genomic-obj-position [object] (when-let [segment (get-longest-segment object)] (let [[start stop] (->> segment - ((juxt :start :end)) - (sort-by +))] + ((juxt :start :end)) + (sort-by +))] (create-genomic-location-obj start stop object segment nil true false)))) (defn genomic-obj-child-positions [object] @@ -138,76 +138,49 @@ :stop feature-end :type (:type feature)})) -(defn transcript-sequence-features [transcript padding status] ; status can be spliced, spliced-with-utr, and unspliced +(defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced (when-let [refseq-obj (genomic-obj transcript)] - (let [ - dddd (println refseq-obj) - seq-features (genomic-obj-child-positions transcript) - ddddd (println seq-features) + (let [seq-features (genomic-obj-child-positions transcript) + status :spliced three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) - sequence-strand (if (> (:start three-prime-utr) (:start five-prime-utr)) "+" "-") - start (if (= "+" sequence-strand) - (if (= status "spliced") - (+ 1 (:stop five-prime-utr)) - (:start five-prime-utr)) - (if (= status "spliced") - (- (:start three-prime-utr) 1) - (:stop three-prime-utr))) - stop (if (= "+" sequence-strand) - (if (= status "spliced") - (- (:start three-prime-utr) 1) - (:stop three-prime-utr)) - (if (= status "spliced") - (+ (:stop five-prime-utr) 1) - (:start three-prime-utr))) - features-raw (when-let [features seq-features] - (some->> features - (map (fn [feature] - (when (not= "CDS" (:type feature)) - {:start (+ 1 - (+ padding - (if (= sequence-strand "+") - (- (if (= status "spliced-with-utr" (if-let [five-prime-utr-start (:start five-prime-utr)] - five-prime-utr-start - (:start feature)) - (:start refseq-obj)) - (- (:stop feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:stop refseq-obj)))))) - :stop (+ 1 - (+ padding - (if (= sequence-strand "+") - (- (:stop feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:start refseq-obj))) - (- (:start feature) - (if-let [three-prime-utr-start (:start three-prime-utr)] - three-prime-utr-start - (:stop refseq-obj)))))) - :type (:type feature)}))) - (remove nil?))) + cds (first (filter (comp #{"CDS"} :type) seq-features)) + sequence-strand (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-") + context-obj (if (= status :cds) cds refseq-obj) + [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) + [(:start context-obj) (:stop context-obj)] + [(:stop context-obj) (:start context-obj)]) + positive-features (some->> seq-features + (map (fn [feature] + (let [feature-type (keyword (:type feature)) + [left-position right-position] + (if (< (:start feature) (:stop feature)) + [(:start feature) (:stop feature)] + [(:stop feature) (:start feature)])] + (when (and (not= feature-type :CDS) + (not + (and (= status :cds) + (or (= feature-type :five_prime_UTR) + (= feature-type :three_prime_UTR))))) + {:start (let [start (+ 1 + (+ padding + (- left-position context-left)))] + (if (neg? start) 0 start)) + :stop (let [stop (+ 1 + (+ padding + (- right-position context-left)))] + (let [length (+ 1 (- context-right context-left))] + (if (> stop length) length stop))) + :type feature-type})))) + (remove nil?)) sequence-positive-raw (get-sequence (conj refseq-obj - (if (= sequence-strand "+") - {:start (if-let [three-prime-utr-start (:start three-prime-utr)] - (- three-prime-utr-start padding) - (- (:start refseq-obj) padding)) - :stop (if-let [five-prime-utr-stop (:stop five-prime-utr)] - (+ five-prime-utr-stop padding) - (+ (:stop refseq-obj) padding))} - {:start (if-let [five-prime-utr-start (:start five-prime-utr)] - (- five-prime-utr-start padding) - (- (:stop refseq-obj) padding)) - :stop (if-let [three-prime-utr-start (:start three-prime-utr)] - (+ three-prime-utr-start padding) - (+ (:start refseq-obj) padding))}))) + {:start context-left + :stop context-right})) sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] (do - (doseq [feature features-raw + (doseq [feature positive-features :when (= "exon" (:type feature))] (swap! dna-sequence assoc @@ -219,7 +192,7 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (doseq [feature features-raw + (doseq [feature positive-features :when (or (= "three_prime_UTR" (:type feature)) (= "five_prime_UTR" (:type feature)))] (swap! dna-sequence @@ -232,8 +205,8 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (if (= status "spliced-with-utr") - (doseq [feature (reverse (sort-by :start features-raw)) + (if (= status :cds) + (doseq [feature (reverse (sort-by :start positive-features)) :when (contains? (set `("intron" "three_prime_UTR" "five_prime_UTR")) (:type feature))] (swap! dna-sequence assoc @@ -245,8 +218,8 @@ (+ 1 (- (:stop feature) (:start feature))))))) - (if (= status "spliced") - (doseq [feature (reverse (sort-by :start features-raw)) + (if (= status :spliced) + (doseq [feature (reverse (sort-by :start positive-features)) :when (= "intron" (:type feature))] (swap! dna-sequence assoc @@ -259,25 +232,7 @@ (- (:stop feature) (:start feature))))))) - (:seq @dna-sequence))) - - positive-features (if (= status "unspliced") - features-raw - (let [pos-features (atom {:features ()}) - next-feature-start (atom 1)] - (do - (doseq [feature (sort-by :start features-raw) - :when (and - (not= "three_prime_UTR" (:type feature)) - (not= "five_prime_UTR" (:type feature)) - (not= "intron" (:type feature))) - :let [feature-end (+ (deref next-feature-start) (- (:stop feature) (:start feature)))]] - (swap! pos-features - assoc - :features - (add-feature (:features @pos-features) feature (deref next-feature-start) feature-end)) - (reset! next-feature-start (+ 1 (+ (deref next-feature-start) (- (:stop feature) (:start feature)))))) - (:features @pos-features))))] + (:seq @dna-sequence)))] {:positive-strand {:features positive-features :sequence sequence-positive} diff --git a/src/rest_api/classes/transcript.clj b/src/rest_api/classes/transcript.clj index cbeb7127..66a22595 100644 --- a/src/rest_api/classes/transcript.clj +++ b/src/rest_api/classes/transcript.clj @@ -42,7 +42,7 @@ :external_links external-links/widget :location location/widget :feature feature/widget - ;:sequences sequences/widget + :sequences sequences/widget :references references/widget} :field {:fpkm_expression_summary_ls exp/fpkm-expression-summary-ls}}) diff --git a/src/rest_api/classes/transcript/widgets/sequences.clj b/src/rest_api/classes/transcript/widgets/sequences.clj index ba659999..3939f25c 100644 --- a/src/rest_api/classes/transcript/widgets/sequences.clj +++ b/src/rest_api/classes/transcript/widgets/sequences.clj @@ -13,15 +13,15 @@ :description "strand orientation of the sequence"}) (defn unspliced-sequence-context-with-padding [t] - {:data (sequence-fns/transcript-sequence-features t 2001 "unspliced") + {:data (sequence-fns/transcript-sequence-features t 2001 :unspliced) :description "the unpliced sequence of the sequence"}) (defn unspliced-sequence-context [t] - {:data (sequence-fns/transcript-sequence-features t 0 "unspliced") + {:data (sequence-fns/transcript-sequence-features t 0 :unspliced) :description "the unpliced sequence of the sequence"}) (defn spliced-sequence-context [t] - {:data (sequence-fns/transcript-sequence-features t 0 "spliced-with-utr") + {:data (sequence-fns/transcript-sequence-features t 0 :spliced) :description "the spliced sequence of the sequence"}) (defn protein-sequence [t] From 4357dcdc76a28922c857d0b47d2558ee7f5c9d57 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Wed, 17 Apr 2019 03:13:14 +0000 Subject: [PATCH 03/15] got both transcript and cds returning results based on whether or not the UTRs should be included --- src/rest_api/classes/sequence/core.clj | 29 +++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index f8f3e99a..68b89b9e 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -141,20 +141,19 @@ (defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) - status :spliced three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) cds (first (filter (comp #{"CDS"} :type) seq-features)) sequence-strand (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-") context-obj (if (= status :cds) cds refseq-obj) [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) - [(:start context-obj) (:stop context-obj)] - [(:stop context-obj) (:start context-obj)]) + [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] + [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) positive-features (some->> seq-features (map (fn [feature] (let [feature-type (keyword (:type feature)) [left-position right-position] - (if (< (:start feature) (:stop feature)) + (if (neg? (- (:start feature) (:stop feature))) [(:start feature) (:stop feature)] [(:stop feature) (:start feature)])] (when (and (not= feature-type :CDS) @@ -162,13 +161,9 @@ (and (= status :cds) (or (= feature-type :five_prime_UTR) (= feature-type :three_prime_UTR))))) - {:start (let [start (+ 1 - (+ padding - (- left-position context-left)))] - (if (neg? start) 0 start)) - :stop (let [stop (+ 1 - (+ padding - (- right-position context-left)))] + {:start (let [start (+ 1 (- left-position context-left))] + (if (neg? start) 1 start)) + :stop (let [stop (+ 1 (- right-position context-left))] (let [length (+ 1 (- context-right context-left))] (if (> stop length) length stop))) :type feature-type})))) @@ -181,7 +176,7 @@ sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] (do (doseq [feature positive-features - :when (= "exon" (:type feature))] + :when (= :exon (:type feature))] (swap! dna-sequence assoc :seq @@ -193,8 +188,8 @@ (- (:stop feature) (:start feature)))))) (doseq [feature positive-features - :when (or (= "three_prime_UTR" (:type feature)) - (= "five_prime_UTR" (:type feature)))] + :when (or (= :three_prime_UTR (:type feature)) + (= :five_prime_UTR (:type feature)))] (swap! dna-sequence assoc :seq @@ -207,7 +202,7 @@ (:start feature)))))) (if (= status :cds) (doseq [feature (reverse (sort-by :start positive-features)) - :when (contains? (set `("intron" "three_prime_UTR" "five_prime_UTR")) (:type feature))] + :when (contains? (set '(:intron :three_prime_UTR :five_prime_UTR)) (:type feature))] (swap! dna-sequence assoc :seq @@ -220,7 +215,7 @@ (:start feature))))))) (if (= status :spliced) (doseq [feature (reverse (sort-by :start positive-features)) - :when (= "intron" (:type feature))] + :when (= :intron (:type feature))] (swap! dna-sequence assoc :seq @@ -237,7 +232,7 @@ {:features positive-features :sequence sequence-positive} :negative-strand - {:features (when-let [seq-length (count sequence-positive)] + {:features (when-let [seq-length (count sequence-positive-raw)] (let [neg-features (atom {:features ()})] (do (doseq [feature positive-features] From f8404340fbb7802c58267bad29475d1ea768416a Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Wed, 17 Apr 2019 14:34:11 +0000 Subject: [PATCH 04/15] making it so that the protein sequence gets pulled for the trancript sequence widget --- src/rest_api/classes/transcript/widgets/sequences.clj | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rest_api/classes/transcript/widgets/sequences.clj b/src/rest_api/classes/transcript/widgets/sequences.clj index 3939f25c..c52a9e8f 100644 --- a/src/rest_api/classes/transcript/widgets/sequences.clj +++ b/src/rest_api/classes/transcript/widgets/sequences.clj @@ -25,8 +25,10 @@ :description "the spliced sequence of the sequence"}) (defn protein-sequence [t] - {:data (when-let [peptide (some->> (:transcript/corresponding-protein t) - (:transcript.corresponding-protein/protein) + {:data (when-let [peptide (some->> (->> t :transcript/corresponding-cds + :transcript.corresponding-cds/cds + :cds/corresponding-protein) + (:cds.corresponding-protein/protein) (:protein/peptide) (:protein.peptide/peptide) (:peptide/sequence))] From 7197b3641843e40e11956a6330212d1e74436c51 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Fri, 19 Apr 2019 00:09:27 +0000 Subject: [PATCH 05/15] changing description for endpoint --- src/rest_api/classes/cds/widgets/sequences.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rest_api/classes/cds/widgets/sequences.clj b/src/rest_api/classes/cds/widgets/sequences.clj index 3994a671..54940034 100644 --- a/src/rest_api/classes/cds/widgets/sequences.clj +++ b/src/rest_api/classes/cds/widgets/sequences.clj @@ -13,7 +13,7 @@ first :transcript/_corresponding-cds)] (sequence-fns/transcript-sequence-features transcript 0 :cds)) - :description "the unpliced sequence of the sequence"}) + :description "the spliced sequence of the transcripts without UTR"}) (defn protein-sequence [c] {:data (when-let [peptide (some->> (:cds/corresponding-protein c) From f47957282dba13dc9f039aabc977a54df9715155 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Fri, 19 Apr 2019 22:54:47 +0000 Subject: [PATCH 06/15] fixed the features positions for sequences that have been spliced --- src/rest_api/classes/sequence/core.clj | 70 +++++++++++++++++++------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 68b89b9e..fb636ec1 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -138,9 +138,31 @@ :stop feature-end :type (:type feature)})) +(defn- get-spliced-exon-positions [positive-features] + (let [last-stop (atom 0)] + (for [feature (sort-by :start positive-features) + :when (= (:type feature) :exon) + :let [last-stop-position @last-stop + new-stop-position (+ last-stop-position + (+ 1 + (- (:stop feature) (:start feature)))) + new-start-position (+ 1 last-stop-position)]] + (do (reset! last-stop new-stop-position) + {:start new-start-position + :stop new-stop-position + :type (:type feature)})))) + (defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) + status-parts (case status + :spliced + #{:exon :three_prime_UTR :five_prime_UTR} + + :cds + #{:exon} + + #{:intron :exon :three_prime_UTR :five_prime_UTR}) three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) cds (first (filter (comp #{"CDS"} :type) seq-features)) @@ -200,22 +222,9 @@ (+ 1 (- (:stop feature) (:start feature)))))) - (if (= status :cds) + (if (contains? #{:cds :spliced} status) (doseq [feature (reverse (sort-by :start positive-features)) - :when (contains? (set '(:intron :three_prime_UTR :five_prime_UTR)) (:type feature))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "remove" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature))))))) - (if (= status :spliced) - (doseq [feature (reverse (sort-by :start positive-features)) - :when (= :intron (:type feature))] + :when (not (some #(= (:type feature) %) status-parts))] (swap! dna-sequence assoc :seq @@ -226,16 +235,39 @@ (+ 1 (- (:stop feature) (:start feature))))))) + (:seq @dna-sequence))) + modified-positive-features (case status + :unspliced + positive-features + + :cds + (get-spliced-exon-positions positive-features) - (:seq @dna-sequence)))] + :spliced + (flatten + (conj + (get-spliced-exon-positions positive-features) + (if (= sequence-strand "+") + (first (filter #(= (:type %) :five_primeUTR) positive-features)) + (first (filter #(= (:type %) :three_prime_UTR) positive-features))) + (let [feature (if (= sequence-strand "+") + (first (filter #(= (:type %) :three_prime_UTR) positive-features)) + (first (filter #(= (:type %) :five_prime_UTR) positive-features))) + end (count sequence-positive)] + (conj + feature + {:start (- end + (+ 1 (- (:stop feature) (:start feature)))) + :stop (count sequence-positive)})) + )))] {:positive-strand - {:features positive-features + {:features modified-positive-features :sequence sequence-positive} :negative-strand - {:features (when-let [seq-length (count sequence-positive-raw)] + {:features (when-let [seq-length (count sequence-positive)] (let [neg-features (atom {:features ()})] (do - (doseq [feature positive-features] + (doseq [feature modified-positive-features] (swap! neg-features assoc :features From 96f7dc3ae35f2ff4e739e67982aeb5c025cb017a Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 23 Apr 2019 14:49:38 +0000 Subject: [PATCH 07/15] adding padding for sequences with padding --- src/rest_api/classes/sequence/core.clj | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index fb636ec1..86fdac36 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -152,6 +152,16 @@ :stop new-stop-position :type (:type feature)})))) +(defn- add-padding-to-feature-list [features padding length] + (when (> padding 0) + ((comp vec flatten conj) features + [{:type :padding + :start 1 + :stop (- padding 1)} + {:type :padding + :start (+ 2 (- length padding)) + :stop length}]))) + (defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) @@ -261,7 +271,9 @@ :stop (count sequence-positive)})) )))] {:positive-strand - {:features modified-positive-features + {:features (if (> padding 0) + (add-padding-to-feature-list modified-positive-features padding (count sequence-positive)) + modified-positive-features) :sequence sequence-positive} :negative-strand {:features (when-let [seq-length (count sequence-positive)] From a1f7e70e5c32c764cb80f60405200608c34b92a1 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 23 Apr 2019 15:32:07 +0000 Subject: [PATCH 08/15] fixing issue with padding and changing name of sequence field for pseudogene --- src/rest_api/classes/pseudogene/widgets/sequences.clj | 4 ++-- src/rest_api/classes/sequence/core.clj | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rest_api/classes/pseudogene/widgets/sequences.clj b/src/rest_api/classes/pseudogene/widgets/sequences.clj index 88fc2a64..0c4eaa2a 100644 --- a/src/rest_api/classes/pseudogene/widgets/sequences.clj +++ b/src/rest_api/classes/pseudogene/widgets/sequences.clj @@ -12,7 +12,7 @@ (name strand-kw)) :description "strand orientation of the sequence"}) -(defn print-sequence [p] +(defn sequence-context [p] {:data (when-let [refseqobj (sequence-fns/genomic-obj p)] (when-let [dna-sequence (sequence-fns/get-sequence refseqobj)] (let [strand (if-let [strand-kw (:locatable/strand p)] @@ -36,4 +36,4 @@ {:name generic/name-field :predicted_exon_structure generic/predicted-exon-structure :strand strand - :print_sequence print-sequence}) + :sequence_context sequence-context}) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 86fdac36..ab60f31a 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -165,6 +165,7 @@ (defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) + padding (if (> padding 0) (- padding 1) 0) status-parts (case status :spliced #{:exon :three_prime_UTR :five_prime_UTR} From fb2ffc494831d6280f07318b68f517adcf986e62 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 23 Apr 2019 16:49:56 +0000 Subject: [PATCH 09/15] standardizing field names for sequences CDS and transcript: #7074 --- src/rest_api/classes/sequence/core.clj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index ab60f31a..5ce2c8fe 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -162,7 +162,7 @@ :start (+ 2 (- length padding)) :stop length}]))) -(defn transcript-sequence-features [transcript padding status] ; status can be :cds, :spliced, and :unspliced +(defn transcript-sequence-features [transcript padding status] (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) padding (if (> padding 0) (- padding 1) 0) @@ -271,12 +271,12 @@ (+ 1 (- (:stop feature) (:start feature)))) :stop (count sequence-positive)})) )))] - {:positive-strand + {:positive_strand {:features (if (> padding 0) (add-padding-to-feature-list modified-positive-features padding (count sequence-positive)) modified-positive-features) :sequence sequence-positive} - :negative-strand + :negative_strand {:features (when-let [seq-length (count sequence-positive)] (let [neg-features (atom {:features ()})] (do @@ -287,4 +287,4 @@ (feature-complement (:features @neg-features) feature seq-length))) (:features @neg-features)))) :sequence (generic-functions/dna-reverse-complement sequence-positive)} - :sequence_strand sequence-strand}))) + :strand sequence-strand}))) From d6bcb7c808d1cce7b452fd8a60d7a740405cccc7 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 23 Apr 2019 17:27:10 +0000 Subject: [PATCH 10/15] uncommenting pseudogene and fixing padding --- src/rest_api/classes/pseudogene.clj | 2 +- src/rest_api/classes/sequence/core.clj | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rest_api/classes/pseudogene.clj b/src/rest_api/classes/pseudogene.clj index fd76e382..f916b72b 100644 --- a/src/rest_api/classes/pseudogene.clj +++ b/src/rest_api/classes/pseudogene.clj @@ -17,7 +17,7 @@ :feature feature/widget :genetics genetics/widget :reagents reagents/widget - ;:sequences sequences/widget + :sequences sequences/widget :expression expression/widget :location location/widget } diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 5ce2c8fe..dd9711a2 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -157,9 +157,9 @@ ((comp vec flatten conj) features [{:type :padding :start 1 - :stop (- padding 1)} + :stop padding} {:type :padding - :start (+ 2 (- length padding)) + :start (- length padding) :stop length}]))) (defn transcript-sequence-features [transcript padding status] From e90d41d9590f2c6808c569b0491c3bbd874d8e78 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Tue, 23 Apr 2019 17:53:34 +0000 Subject: [PATCH 11/15] fixing padding :) --- src/rest_api/classes/sequence/core.clj | 18 ++++++++++-------- .../classes/transcript/widgets/sequences.clj | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index dd9711a2..75169cb5 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -159,13 +159,12 @@ :start 1 :stop padding} {:type :padding - :start (- length padding) + :start (+ (- length padding) 1) :stop length}]))) (defn transcript-sequence-features [transcript padding status] (when-let [refseq-obj (genomic-obj transcript)] (let [seq-features (genomic-obj-child-positions transcript) - padding (if (> padding 0) (- padding 1) 0) status-parts (case status :spliced #{:exon :three_prime_UTR :five_prime_UTR} @@ -269,18 +268,21 @@ feature {:start (- end (+ 1 (- (:stop feature) (:start feature)))) - :stop (count sequence-positive)})) - )))] + :stop (count sequence-positive)}))))) + modified-positive-features-with-padding (if (> padding 0) + (add-padding-to-feature-list + modified-positive-features + padding + (count sequence-positive)) + modified-positive-features)] {:positive_strand - {:features (if (> padding 0) - (add-padding-to-feature-list modified-positive-features padding (count sequence-positive)) - modified-positive-features) + {:features modified-positive-features-with-padding :sequence sequence-positive} :negative_strand {:features (when-let [seq-length (count sequence-positive)] (let [neg-features (atom {:features ()})] (do - (doseq [feature modified-positive-features] + (doseq [feature modified-positive-features-with-padding] (swap! neg-features assoc :features diff --git a/src/rest_api/classes/transcript/widgets/sequences.clj b/src/rest_api/classes/transcript/widgets/sequences.clj index c52a9e8f..da2bf27c 100644 --- a/src/rest_api/classes/transcript/widgets/sequences.clj +++ b/src/rest_api/classes/transcript/widgets/sequences.clj @@ -13,7 +13,7 @@ :description "strand orientation of the sequence"}) (defn unspliced-sequence-context-with-padding [t] - {:data (sequence-fns/transcript-sequence-features t 2001 :unspliced) + {:data (sequence-fns/transcript-sequence-features t 2000 :unspliced) :description "the unpliced sequence of the sequence"}) (defn unspliced-sequence-context [t] From cfee34f1b605ac78b404f6ac4af819d9a477b5dd Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Thu, 2 May 2019 20:47:20 +0000 Subject: [PATCH 12/15] it is now working with tRNA --- src/rest_api/classes/sequence/core.clj | 247 +++++++++++++------------ src/rest_api/db/sequence.clj | 3 +- src/rest_api/db/sql/sequence.sql | 6 +- 3 files changed, 129 insertions(+), 127 deletions(-) diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index f7b25126..322d2cad 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -95,9 +95,7 @@ (create-genomic-location-obj start stop object segment nil true))))) (defn genomic-obj-child-positions [object feature-id] - (some->> (let [d (get-transcript-segments object feature-id)] - (do (println "before") (println d) (println "after") - d)) + (some->> (get-transcript-segments object feature-id) (map (fn [feature] (conj feature @@ -167,91 +165,94 @@ (defn transcript-sequence-features [transcript padding status] (when-let [refseq-obj (genomic-obj transcript)] - (let [d (println refseq-obj) - seq-features (genomic-obj-child-positions transcript (:feature_id refseq-obj)) - dd (println seq-features) - status-parts (case status - :spliced - #{:exon :three_prime_UTR :five_prime_UTR} + (let [seq-features (genomic-obj-child-positions transcript (:feature_id refseq-obj))] + (let [status-parts (case status + :spliced + #{:exon :three_prime_UTR :five_prime_UTR} - :cds - #{:exon} + :cds + #{:exon} - #{:intron :exon :three_prime_UTR :five_prime_UTR}) - three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) - five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) - cds (first (filter (comp #{"CDS"} :type) seq-features)) - sequence-strand (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-") - context-obj (if (= status :cds) cds refseq-obj) - [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) - [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] - [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) - positive-features (some->> seq-features - (map (fn [feature] - (let [feature-type (keyword (:type feature)) - [left-position right-position] - (if (neg? (- (:start feature) (:stop feature))) - [(:start feature) (:stop feature)] - [(:stop feature) (:start feature)])] - (when (and (not= feature-type :CDS) - (not - (and (= status :cds) - (or (= feature-type :five_prime_UTR) - (= feature-type :three_prime_UTR))))) - {:start (let [start (+ 1 (- left-position context-left))] - (if (neg? start) 1 start)) - :stop (let [stop (+ 1 (- right-position context-left))] - (let [length (+ 1 (- context-right context-left))] - (if (> stop length) length stop))) - :type feature-type})))) - (remove nil?)) - sequence-positive-raw (get-sequence - (conj - refseq-obj - {:start context-left - :stop context-right})) - sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] - (do - (doseq [feature positive-features - :when (= :exon (:type feature))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "uppercase" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature)))))) - (doseq [feature positive-features - :when (or (= :three_prime_UTR (:type feature)) - (= :five_prime_UTR (:type feature)))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "lowercase" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature)))))) - (if (contains? #{:cds :spliced} status) - (doseq [feature (reverse (sort-by :start positive-features)) - :when (not (some #(= (:type feature) %) status-parts))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "remove" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature))))))) - (:seq @dna-sequence))) - modified-positive-features (case status + #{:intron :exon :three_prime_UTR :five_prime_UTR}) + three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) + five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) + cds (first (filter (comp #{"CDS"} :type) seq-features)) + sequence-strand (if (some nil? [three-prime-utr five-prime-utr]) + (when-let [strand (:locatable/strand transcript)] + (cond + (= strand :locatable.strand/negative) "-" + (= strand :locatable.strand/positive) "+")) + (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-")) + context-obj (if (and (= status :cds) (some? cds))cds refseq-obj) + [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) + [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] + [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) + positive-features (some->> seq-features + (map (fn [feature] + (let [feature-type (keyword (:type feature)) + [left-position right-position] + (if (neg? (- (:start feature) (:stop feature))) + [(:start feature) (:stop feature)] + [(:stop feature) (:start feature)])] + (when (and (not= feature-type :CDS) + (not + (and (= status :cds) + (or (= feature-type :five_prime_UTR) + (= feature-type :three_prime_UTR))))) + {:start (let [start (+ 1 (- left-position context-left))] + (if (neg? start) 1 start)) + :stop (let [stop (+ 1 (- right-position context-left))] + (let [length (+ 1 (- context-right context-left))] + (if (> stop length) length stop))) + :type feature-type})))) + (remove nil?)) + sequence-positive-raw (get-sequence + (conj + refseq-obj + {:start context-left + :stop context-right})) + sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] + (do + (doseq [feature positive-features + :when (= :exon (:type feature))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "uppercase" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature)))))) + (doseq [feature positive-features + :when (or (= :three_prime_UTR (:type feature)) + (= :five_prime_UTR (:type feature)))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "lowercase" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature)))))) + (if (contains? #{:cds :spliced} status) + (doseq [feature (reverse (sort-by :start positive-features)) + :when (not (some #(= (:type feature) %) status-parts))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "remove" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature))))))) + (:seq @dna-sequence))) + modified-positive-features (case status :unspliced positive-features @@ -259,39 +260,41 @@ (get-spliced-exon-positions positive-features) :spliced - (flatten - (conj - (get-spliced-exon-positions positive-features) - (if (= sequence-strand "+") - (first (filter #(= (:type %) :five_primeUTR) positive-features)) - (first (filter #(= (:type %) :three_prime_UTR) positive-features))) - (let [feature (if (= sequence-strand "+") - (first (filter #(= (:type %) :three_prime_UTR) positive-features)) - (first (filter #(= (:type %) :five_prime_UTR) positive-features))) - end (count sequence-positive)] - (conj - feature - {:start (- end - (+ 1 (- (:stop feature) (:start feature)))) - :stop (count sequence-positive)}))))) - modified-positive-features-with-padding (if (> padding 0) - (add-padding-to-feature-list - modified-positive-features - padding - (count sequence-positive)) - modified-positive-features)] - {:positive_strand - {:features modified-positive-features-with-padding - :sequence sequence-positive} - :negative_strand - {:features (when-let [seq-length (count sequence-positive)] - (let [neg-features (atom {:features ()})] - (do - (doseq [feature modified-positive-features-with-padding] - (swap! neg-features - assoc - :features - (feature-complement (:features @neg-features) feature seq-length))) - (:features @neg-features)))) - :sequence (generic-functions/dna-reverse-complement sequence-positive)} - :strand sequence-strand}))) + (remove nil? + (flatten + (conj + (get-spliced-exon-positions positive-features) + (if (= sequence-strand "+") + (first (filter #(= (:type %) :five_primeUTR) positive-features)) + (first (filter #(= (:type %) :three_prime_UTR) positive-features))) + (let [feature (if (= sequence-strand "+") + (first (filter #(= (:type %) :three_prime_UTR) positive-features)) + (first (filter #(= (:type %) :five_prime_UTR) positive-features))) + end (count sequence-positive)] + (if (some? feature) + (conj + feature + {:start (- end + (+ 1 (- (:stop feature) (:start feature)))) + :stop (count sequence-positive)}))))))) + modified-positive-features-with-padding (if (> padding 0) + (add-padding-to-feature-list + modified-positive-features + padding + (count sequence-positive)) + modified-positive-features)] + {:positive_strand + {:features modified-positive-features-with-padding + :sequence sequence-positive} + :negative_strand + {:features (when-let [seq-length (count sequence-positive)] + (let [neg-features (atom {:features ()})] + (do + (doseq [feature modified-positive-features-with-padding] + (swap! neg-features + assoc + :features + (feature-complement (:features @neg-features) feature seq-length))) + (:features @neg-features)))) + :sequence (generic-functions/dna-reverse-complement sequence-positive)} + :strand sequence-strand})))) diff --git a/src/rest_api/db/sequence.clj b/src/rest_api/db/sequence.clj index 1b876560..5d559ce0 100644 --- a/src/rest_api/db/sequence.clj +++ b/src/rest_api/db/sequence.clj @@ -95,5 +95,4 @@ (- high low-offset)))) (defn get-seq-features [db-spec transcript] - (do (println "DDDD") (println transcript) (println "EEEE") - (sequencesql/get-seq-features db-spec {:name transcript}))) + (sequencesql/get-seq-features db-spec {:name transcript})) diff --git a/src/rest_api/db/sql/sequence.sql b/src/rest_api/db/sql/sequence.sql index b2a03dc7..32336b46 100644 --- a/src/rest_api/db/sql/sequence.sql +++ b/src/rest_api/db/sql/sequence.sql @@ -65,14 +65,14 @@ AND s.offset = :offset -- :name get-seq-features :? :* -- :doc Retreive all sequence features from transcript -SELECT tc.tag,fc.start AS start,fc.end AS stop +SELECT t.tag,fc.start AS start,fc.end AS stop FROM feature as f LEFT OUTER JOIN parent2child as pc ON pc.id=f.id -LEFT OUTER JOIN typelist as t ON t.id=f.typeid LEFT OUTER JOIN feature as fc ON pc.child=fc.id -LEFT OUTER JOIN typelist as tc ON tc.id=fc.typeid +LEFT OUTER JOIN typelist as t ON t.id=fc.typeid LEFT OUTER JOIN name as n ON n.id=f.id WHERE n.name = :name AND (t.tag LIKE "transcript%" OR t.tag LIKE "CDS%" + OR t.tag LIKE "exon%" OR t.tag LIKE "mRNA%") From 1d705650f29f0e9e556d09fc3b059d77c35a0657 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Fri, 3 May 2019 18:33:24 +0000 Subject: [PATCH 13/15] finished getting all endpoints pointed out by Paul Davis to work --- src/rest_api/classes/cds/widgets/overview.clj | 1 - .../classes/cds/widgets/sequences.clj | 6 +- src/rest_api/classes/generic_fields.clj | 40 ++- src/rest_api/classes/sequence/core.clj | 266 ++++++++++-------- src/rest_api/db/sequence.clj | 15 +- 5 files changed, 191 insertions(+), 137 deletions(-) diff --git a/src/rest_api/classes/cds/widgets/overview.clj b/src/rest_api/classes/cds/widgets/overview.clj index 08680f60..e74b0a05 100644 --- a/src/rest_api/classes/cds/widgets/overview.clj +++ b/src/rest_api/classes/cds/widgets/overview.clj @@ -7,7 +7,6 @@ (defn description [cds] {:data (:cds.detailed-description/text (first (:cds/detailed-description cds))) - :d (:db/id cds) :description (str "description of the CDS " (:cds/id cds))}) (defn partial-field [cds] diff --git a/src/rest_api/classes/cds/widgets/sequences.clj b/src/rest_api/classes/cds/widgets/sequences.clj index 54940034..cfd139e6 100644 --- a/src/rest_api/classes/cds/widgets/sequences.clj +++ b/src/rest_api/classes/cds/widgets/sequences.clj @@ -8,10 +8,12 @@ [rest-api.formatters.object :as obj :refer [pack-obj]])) (defn cds-sequence [c] - {:data (when-let [transcript (-> c + {:data (when-let [transcript (or + (-> c :transcript.corresponding-cds/_cds first - :transcript/_corresponding-cds)] + :transcript/_corresponding-cds) + c)] (sequence-fns/transcript-sequence-features transcript 0 :cds)) :description "the spliced sequence of the transcripts without UTR"}) diff --git a/src/rest_api/classes/generic_fields.clj b/src/rest_api/classes/generic_fields.clj index 006c712d..ec0b486f 100644 --- a/src/rest_api/classes/generic_fields.clj +++ b/src/rest_api/classes/generic_fields.clj @@ -536,19 +536,33 @@ (corresponding-all-gene gene object role nil)))) "cds" - (when-let [ths (:transcript.corresponding-cds/_cds object)] - (let [genes - (distinct - (flatten - (for [th ths - :let [ghs (:gene.corresponding-transcript/_transcript - (:transcript/_corresponding-cds th))]] - (for [gh ghs - :let [gene (:gene/_corresponding-transcript gh)]] - gene))))] - (flatten - (for [gene genes] - (corresponding-all-gene gene object role nil))))) + (or + (when-let [ths (:transcript.corresponding-cds/_cds object)] + (let [genes + (distinct + (flatten + (for [th ths + :let [ghs (:gene.corresponding-transcript/_transcript + (:transcript/_corresponding-cds th))]] + (for [gh ghs + :let [gene (:gene/_corresponding-transcript gh)]] + gene))))] + (flatten + (for [gene genes] + (corresponding-all-gene gene object role nil))))) + (when-let [ths (:transposon.corresponding-cds/_cds object)] + (let [genes + (distinct + (flatten + (for [th ths + :let [ghs (:gene.corresponding-transposon/_transposon + (:transposon/_corresponding-cds th))]] + (for [gh ghs + :let [gene (:gene/_corresponding-transposon gh)]] + gene))))] + (flatten + (for [gene genes] + (corresponding-all-gene gene object role nil)))))) "protein" ;; need to make it filter for only the row with the protein (when-let [cdshs (:cds.corresponding-protein/_protein object)] diff --git a/src/rest_api/classes/sequence/core.clj b/src/rest_api/classes/sequence/core.clj index 322d2cad..2560e97c 100644 --- a/src/rest_api/classes/sequence/core.clj +++ b/src/rest_api/classes/sequence/core.clj @@ -36,7 +36,8 @@ (sequence-features sequence-database (id-kw object) role))))) (defn get-transcript-segments [object feature-id] - (let [g-species (get-g-species object "transcript") + (let [g-species (or (get-g-species object "transcript") + (get-g-species object "cds")) sequence-database (seqdb/get-default-sequence-database g-species)] (when sequence-database (when-let [db ((keyword sequence-database) wb-seq/sequence-dbs)] @@ -86,7 +87,8 @@ (defn genomic-obj [object] (let [id-kw (first (filter #(= (name %) "id") (keys object))) role (namespace id-kw)] - (when-let [segment (if (= "transcript" role) + (when-let [segment (if (or (= "cds" role) + (= "transcript" role)) (get-transcript-segment object) (get-longest-segment object))] (let [[start stop] (->> segment @@ -153,6 +155,32 @@ :stop new-stop-position :type (:type feature)})))) +(defn- add-introns [features] + (let [features-with-introns (atom ()) + intron-and-exon-features (filter #(or (= "exon" (:type %)) + (= "intron" (:type %))) features) + non-intron-and-exon-features (filter #(and (not= "exon" (:type %)) + (not= "intron" (:type %))) features)] + (do + (doseq [feature (sort-by :start intron-and-exon-features)] + (if (or (= (count @features-with-introns) 0) + (= (:stop (last @features-with-introns)) + (- (:start feature) 1))) + (swap! features-with-introns conj feature) + (do + (swap! features-with-introns conj (let [stop (- (:start feature) 1)] + {:start (+ (:stop (last + (filter + #(< (:stop %) stop) + (sort-by :start @features-with-introns)))) + 1) + :stop stop + :type "intron"})) + (swap! features-with-introns conj feature)))) + (doseq [feature (sort-by :start non-intron-and-exon-features)] + (swap! features-with-introns conj feature)) + @features-with-introns))) + (defn- add-padding-to-feature-list [features padding length] (when (> padding 0) ((comp vec flatten conj) features @@ -165,124 +193,128 @@ (defn transcript-sequence-features [transcript padding status] (when-let [refseq-obj (genomic-obj transcript)] - (let [seq-features (genomic-obj-child-positions transcript (:feature_id refseq-obj))] - (let [status-parts (case status - :spliced - #{:exon :three_prime_UTR :five_prime_UTR} + (let [seq-features (genomic-obj-child-positions transcript (:feature_id refseq-obj)) + status-parts (case status + :spliced + #{:exon :three_prime_UTR :five_prime_UTR} + + :cds + #{:exon} - :cds - #{:exon} + #{:intron :exon :three_prime_UTR :five_prime_UTR}) + three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) + five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) + cds (first (filter (comp #{"CDS"} :type) seq-features)) + mrna (first (filter (comp #{"mRNA"} :type) seq-features)) + sequence-strand (if (some nil? [three-prime-utr five-prime-utr]) + (when-let [strand (:locatable/strand transcript)] + (cond + (= strand :locatable.strand/negative) "-" + (= strand :locatable.strand/positive) "+")) + (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-")) + context-obj (if (and (= status :cds) (some? cds)) cds refseq-obj) + [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) + [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] + [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) - #{:intron :exon :three_prime_UTR :five_prime_UTR}) - three-prime-utr (first (filter (comp #{"three_prime_UTR"} :type) seq-features)) - five-prime-utr (first (filter (comp #{"five_prime_UTR"} :type) seq-features)) - cds (first (filter (comp #{"CDS"} :type) seq-features)) - sequence-strand (if (some nil? [three-prime-utr five-prime-utr]) - (when-let [strand (:locatable/strand transcript)] - (cond - (= strand :locatable.strand/negative) "-" - (= strand :locatable.strand/positive) "+")) - (if (< (:start five-prime-utr) (:stop three-prime-utr)) "+" "-")) - context-obj (if (and (= status :cds) (some? cds))cds refseq-obj) - [context-left context-right] (if (neg? (- (:start context-obj) (:stop context-obj))) - [(- (:start context-obj) padding) (+ (:stop context-obj) padding)] - [(- (:stop context-obj) padding) (+ (:start context-obj) padding)]) - positive-features (some->> seq-features - (map (fn [feature] - (let [feature-type (keyword (:type feature)) - [left-position right-position] - (if (neg? (- (:start feature) (:stop feature))) - [(:start feature) (:stop feature)] - [(:stop feature) (:start feature)])] - (when (and (not= feature-type :CDS) - (not - (and (= status :cds) - (or (= feature-type :five_prime_UTR) - (= feature-type :three_prime_UTR))))) - {:start (let [start (+ 1 (- left-position context-left))] - (if (neg? start) 1 start)) - :stop (let [stop (+ 1 (- right-position context-left))] - (let [length (+ 1 (- context-right context-left))] - (if (> stop length) length stop))) - :type feature-type})))) - (remove nil?)) - sequence-positive-raw (get-sequence - (conj - refseq-obj - {:start context-left - :stop context-right})) - sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] - (do - (doseq [feature positive-features - :when (= :exon (:type feature))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "uppercase" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature)))))) - (doseq [feature positive-features - :when (or (= :three_prime_UTR (:type feature)) - (= :five_prime_UTR (:type feature)))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "lowercase" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature)))))) - (if (contains? #{:cds :spliced} status) - (doseq [feature (reverse (sort-by :start positive-features)) - :when (not (some #(= (:type feature) %) status-parts))] - (swap! dna-sequence - assoc - :seq - (replace-in-str - "remove" - (:seq @dna-sequence) - (- (:start feature) 1) - (+ 1 - (- (:stop feature) - (:start feature))))))) - (:seq @dna-sequence))) - modified-positive-features (case status - :unspliced - positive-features + seq-features-with-introns (add-introns seq-features) + positive-features (some->> seq-features-with-introns + (map (fn [feature] + (let [feature-type (keyword (:type feature)) + [left-position right-position] + (if (neg? (- (:start feature) (:stop feature))) + [(:start feature) (:stop feature)] + [(:stop feature) (:start feature)])] + (when (and (not= feature-type :CDS) + (and (not= feature-type :mRNA) + (not + (and (= status :cds) + (or (= feature-type :five_prime_UTR) + (= feature-type :three_prime_UTR)))))) + {:start (let [start (+ 1 (- left-position context-left))] + (if (neg? start) 1 start)) + :stop (let [stop (+ 1 (- right-position context-left))] + (let [length (+ 1 (- context-right context-left))] + (if (> stop length) length stop))) + :type feature-type})))) + (remove nil?)) + sequence-positive-raw (get-sequence + (conj + refseq-obj + {:start context-left + :stop context-right})) + sequence-positive (let [dna-sequence (atom {:seq sequence-positive-raw})] + (do + (doseq [feature positive-features + :when (= :exon (:type feature))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "uppercase" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature)))))) + (doseq [feature positive-features + :when (or (= :three_prime_UTR (:type feature)) + (= :five_prime_UTR (:type feature)))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "lowercase" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature)))))) + (if (contains? #{:cds :spliced} status) + (doseq [feature (reverse (sort-by :start positive-features)) + :when (not (some #(= (:type feature) %) status-parts))] + (swap! dna-sequence + assoc + :seq + (replace-in-str + "remove" + (:seq @dna-sequence) + (- (:start feature) 1) + (+ 1 + (- (:stop feature) + (:start feature)))))))) + (:seq @dna-sequence)) + modified-positive-features (case status + :unspliced + positive-features - :cds - (get-spliced-exon-positions positive-features) + :cds + (get-spliced-exon-positions positive-features) - :spliced - (remove nil? - (flatten - (conj - (get-spliced-exon-positions positive-features) - (if (= sequence-strand "+") - (first (filter #(= (:type %) :five_primeUTR) positive-features)) - (first (filter #(= (:type %) :three_prime_UTR) positive-features))) - (let [feature (if (= sequence-strand "+") - (first (filter #(= (:type %) :three_prime_UTR) positive-features)) - (first (filter #(= (:type %) :five_prime_UTR) positive-features))) - end (count sequence-positive)] - (if (some? feature) - (conj - feature - {:start (- end - (+ 1 (- (:stop feature) (:start feature)))) - :stop (count sequence-positive)}))))))) - modified-positive-features-with-padding (if (> padding 0) - (add-padding-to-feature-list - modified-positive-features - padding - (count sequence-positive)) - modified-positive-features)] + :spliced + (remove nil? + (flatten + (conj + (get-spliced-exon-positions positive-features) + (if (= sequence-strand "+") + (first (filter #(= (:type %) :five_primeUTR) positive-features)) + (first (filter #(= (:type %) :three_prime_UTR) positive-features))) + (let [feature (if (= sequence-strand "+") + (first (filter #(= (:type %) :three_prime_UTR) positive-features)) + (first (filter #(= (:type %) :five_prime_UTR) positive-features))) + end (count sequence-positive)] + (if (some? feature) + (conj + feature + {:start (- end + (+ 1 (- (:stop feature) (:start feature)))) + :stop (count sequence-positive)}))))))) + modified-positive-features-with-padding (if (> padding 0) + (add-padding-to-feature-list + modified-positive-features + padding + (count sequence-positive)) + modified-positive-features)] {:positive_strand {:features modified-positive-features-with-padding :sequence sequence-positive} @@ -297,4 +329,4 @@ (feature-complement (:features @neg-features) feature seq-length))) (:features @neg-features)))) :sequence (generic-functions/dna-reverse-complement sequence-positive)} - :strand sequence-strand})))) + :strand sequence-strand}))) diff --git a/src/rest_api/db/sequence.clj b/src/rest_api/db/sequence.clj index 5d559ce0..63edd484 100644 --- a/src/rest_api/db/sequence.clj +++ b/src/rest_api/db/sequence.clj @@ -61,10 +61,17 @@ (sequencesql/get-features-by-id db-spec attribute))))) (defn sequence-features-where-type [db-spec feature-name method] - (sequencesql/sequence-features-where-type - db-spec - {:name feature-name - :tag method})) + (let [features (sequencesql/sequence-features-where-type + db-spec + {:name feature-name + :tag method}) ] + (if (> (count features) 0) + features + (when (= method "CDS%") + (sequencesql/sequence-features-where-type + db-spec + {:name feature-name + :tag "mRNA%"}))))) (defn variation-features [db-spec variation-name] (sequencesql/variation-features From 752f095ad3ac62f4da897073b982e045f8764690 Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Fri, 3 May 2019 18:37:21 +0000 Subject: [PATCH 14/15] taking out commented lines I missed earlier --- src/rest_api/classes/generic_fields.clj | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/rest_api/classes/generic_fields.clj b/src/rest_api/classes/generic_fields.clj index ec0b486f..a8890579 100644 --- a/src/rest_api/classes/generic_fields.clj +++ b/src/rest_api/classes/generic_fields.clj @@ -89,9 +89,7 @@ [(some->> (:cds.corresponding-protein/_protein object) (map :cds/_corresponding-protein) (filter #(not= "history" (:method/id (:locatable/method %)))) - ; (map :gene.corresponding-cds/_cds) first) - ; (map :gene/_corresponding-cds)) "gene"] :else @@ -157,8 +155,7 @@ (when-let [position (sequence-fns/genomic-obj object)] [position])) :description "The genomic location of the sequence"})) - ; (map :gene/_corresponding-cds) -; (map sequence-fns/genomic-obj)) + (defn microarray-assays [object] {:data (some->> (:locatable/_parent object) (map (fn [f] From 3e4280e6e275a9680bdc247a6442d89a580cb4ef Mon Sep 17 00:00:00 2001 From: Adam Wright Date: Fri, 3 May 2019 18:46:04 +0000 Subject: [PATCH 15/15] fix to merge conflict --- src/rest_api/classes/generic_fields.clj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rest_api/classes/generic_fields.clj b/src/rest_api/classes/generic_fields.clj index e5648da4..88067b81 100644 --- a/src/rest_api/classes/generic_fields.clj +++ b/src/rest_api/classes/generic_fields.clj @@ -151,8 +151,7 @@ (map :cds/_corresponding-protein) (filter #(not= "history" (:method/id (:locatable/method %)))) (first) - (sequence-fns/genomic-obj)) - + (sequence-fns/genomic-obj))] (when-let [position (sequence-fns/genomic-obj object)] [position])) :description "The genomic location of the sequence"}))