-
Notifications
You must be signed in to change notification settings - Fork 2
/
convert_sj_to_psi.R
70 lines (47 loc) · 2.24 KB
/
convert_sj_to_psi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env Rscript
library("dasper")
library(GenomicRanges)
library(tidyverse)
library(data.table)
library("optparse")
output_the_psi_files = function(sample_name,
sample_file,
gtf,
output_folder,
mincount = 1){
output_filepath_raw = glue::glue("{output_folder}/{sample_name}_annotated.csv")
output_filepath_normed = glue::glue("{output_folder}/{sample_name}_normalized_annotated.csv")
ref = GenomicFeatures::makeTxDbFromGFF(gtf,format = 'gtf')
junctions <-
junction_load(
junction_paths = sample_file
)
junctions <- junction_annot(junctions, ref)
junctions_filtered <- junction_filter(junctions,
count_thresh = c(raw = mincount)) %>% junction_norm()
annotated_clustered= as.data.table(junctions_filtered@rowRanges) %>%
dplyr::select(seqnames,start,end,strand_junction,type,gene_id_junction,index,clusters)
normed = as.data.table(SummarizedExperiment::assays(junctions_filtered)[["norm"]])
normed$index = 1:nrow(normed)
annotated_clustered_normed = annotated_clustered %>% left_join(normed,by = "index")
setnames(annotated_clustered_normed,"count_1", sample_name)
annotated_clustered_normed = annotated_clustered %>% left_join(normed,by = "index")
setnames(annotated_clustered_normed,"count_1", sample_name)
fwrite(annotated_clustered_normed,output_filepath_normed)
print("File all written!")
}
option_list = list(
make_option(c("-n", "--sample_name"), type="character", default=NULL,
help="the final sample output name", metavar="character"),
make_option(c("-f", "--sample_file"), type="character", default=NULL,
help="the final sample file path", metavar="character"),
make_option(c("-g", "--gtf"), type="character", default=NULL,
help="GTF to annotated against", metavar="character"),
make_option(c("-o", "--output_folder"), type="character", default="out.txt",
help="output file name", metavar="character"),
make_option(c("-m", "--mincount"), type="character", default=1,
help="mincount to consider a junction", metavar="integer")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
output_the_psi_files(opt$sample_name,opt$sample_file,opt$gtf,opt$output_folder)