-
Notifications
You must be signed in to change notification settings - Fork 1
/
output_reordering.Rmd
75 lines (59 loc) · 3.29 KB
/
output_reordering.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
---
title: "Output Reordering script"
output: html_notebook
---
# by Gandhar Mahadeshwar
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(dplyr)
library(foreign)
```
```{r}
As <- read.arff("~/Desktop/23S Results/23S_As.arff")
Cs <- read.arff("~/Desktop/23S Results/23S_Cs.arff")
Gs <- read.arff("~/Desktop/23S Results/23S_Gs.arff")
Us <- read.arff("~/Desktop/23S Results/23S_Us.arff")
profile <- read.table("~/Desktop/23S Results/1mg2mn_ecoli_23s/1mg2mn_ecoli_23s_ecoli_23s_april23_2021_profile.txt", header = TRUE)
cumdf_adenine_nuc <- (profile %>% filter(Sequence == "A"))$Nucleotide
cumdf_cytosines_nuc <- (profile %>% filter(Sequence == "C"))$Nucleotide
cumdf_guanine_nuc <- (profile %>% filter(Sequence == "G"))$Nucleotide
cumdf_uracils_nuc <- (profile %>% filter(Sequence == "U"))$Nucleotide
#adenines:total Mn2+ mutation rate above 0.5 were classified as a “m1A”
wc_cutoff_A <- which(As$Mn_rate > 0.5)
#uridines:total Mn2+ mutation rate above 0.5 were classified as a “m3U or m1acp3Y”
wc_cutoff_U <- which(Us$Mn_rate > 0.5)
#guanosines: total Mn2+ mutation rate above 0.05 and total Mg2+ mutation rate above 0.0015 were classified as “m7G”
#wc_cutoff_G <- Gs %>% filter(Mn_rate > 0.05) %>% (Untreated_rate > 0.00)
wc_cutoff_G <- which((Gs$Mn_rate > 0.5) & (Gs$Untreated_rate > 0.5))
pred_As <- cbind(rep("A", length(cumdf_adenine_nuc)), cumdf_adenine_nuc, As %>%
select(starts_with("predict"))) %>% rename(cumdf_nuc = cumdf_adenine_nuc) %>%
rename(seq = 'rep("A", length(cumdf_adenine_nuc))')
pred_As$`predicted Modifications` <- as.character(pred_As$`predicted Modifications`)
pred_As$`predicted Modifications`[wc_cutoff_A] <- "m1A"
pred_As$`prediction margin`[wc_cutoff_A] <- "n/a"
pred_Cs <- cbind(rep("C", length(cumdf_cytosines_nuc)), cumdf_cytosines_nuc, Cs %>%
select(starts_with("predict"))) %>% rename(cumdf_nuc = cumdf_cytosines_nuc) %>%
rename(seq = 'rep("C", length(cumdf_cytosines_nuc))')
pred_Cs$`predicted Modifications` <- as.character(pred_Cs$`predicted Modifications`)
pred_Gs <- cbind(rep("G", length(cumdf_guanine_nuc)), cumdf_guanine_nuc, Gs %>%
select(starts_with("predict"))) %>% rename(cumdf_nuc = cumdf_guanine_nuc) %>%
rename(seq = 'rep("G", length(cumdf_guanine_nuc))')
pred_Gs$`predicted Modifications` <- as.character(pred_Gs$`predicted Modifications`)
pred_Gs$`predicted Modifications`[wc_cutoff_G] <- "m7G"
pred_Gs$`prediction margin`[wc_cutoff_G] <- "n/a"
pred_Us <- cbind(rep("U", length(cumdf_uracils_nuc)), cumdf_uracils_nuc, Us %>%
select(starts_with("predict"))) %>% rename(cumdf_nuc = cumdf_uracils_nuc) %>%
rename(seq = 'rep("U", length(cumdf_uracils_nuc))')
pred_Us$`predicted Modifications` <- as.character(pred_Us$`predicted Modifications`)
pred_Us$`predicted Modifications`[wc_cutoff_U] <- "m3U or m1acp3Y"
pred_Us$`prediction margin`[wc_cutoff_U] <- "n/a"
```
```{r}
unordered <- rbind(pred_As, pred_Cs, pred_Gs, pred_Us)
newpred <- unordered[order(as.numeric(unordered$cumdf_nuc)),]
newpred <- newpred %>% rename(Nucleotide = 'seq') %>% rename(Position = 'cumdf_nuc') %>% rename('Prediction Margin' = 'prediction margin') %>%
rename ('Predicted Modifications' = 'predicted Modifications')
write.csv(newpred, file = "ordered_predictions.csv", row.names = FALSE)
```