-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathind_prof_origpat_local_sect.R
executable file
·91 lines (71 loc) · 3.32 KB
/
ind_prof_origpat_local_sect.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# This program generates profiles by patient (ie: bp vs log2 ratio CGH) for a specific section provided inital and ending bp positions
# In one pdf you will have as many graphs as patients in an specific group
# Type in the data at the beginning with the chorm and arm for the graphs and phenotype
# Make sure you include all the patients you need in the group as this program will not read the phenotype
# modify the for loop for the name you used for the list of patients
# This program may also add 2 or 4 vertical lines to show where the segments start, you have to input the position for them at the end
# example in local: source("/Users/gina/Desktop/Gina/BioTec/Homology/Code Orig and Modified/myPaper/ind_prof_origpat_local.R");
# TODO: Pass arguments in an efficient way
begPath <- "~/Research/";
dataSet <- "horlings";
cghStart <- 6;
chrArmSect <- "17q.s2";
chr <- "4";
arm <- "q";
starting <- 32489785 #starting position for the section in bp
ending <- 43339849 #ending position for the section in bp
phenotype <- "HER2";
subdir <- "sect"
#for 17qs1
#starting <- 25440972
#ending <- 37812853
#for 17qs2
#starting <- 32489785
#ending <- 43339849
dataPath <- paste(begPath, '/Data/', dataSet, '/', dataSet, '_data_full.txt', sep='');
data <- read.table(dataPath, header=T, sep='\t');
cgh <- data[, cghStart:ncol(data)];
numPats <- ncol(cgh);
# getting the indices for each phenotype group
# Read the phenotype data
phenFile <- paste(dataSet, "phen.txt", sep="_");
phenPath <- paste(begPath, "Data", dataSet, phenFile, sep="/");
phenData <- read.table(phenPath, header=TRUE, sep="\t");
print(paste("phenPath",phenPath, sep=" "));
phen1indices <- which(phenData[,phenotype] == 1);
phen1num<-length(phen1indices);
print(paste("There are ", phen1num, " phenotype1", sep=""));
#caIndices <- intersect(which(data$Chrom == chr), which(data$Arm == arm));
dataSect <- data[(data$Chrom==chr & data$bp>=starting & data$bp<=ending),]
Mbp <- dataSect$bp/1000000
cgh <- dataSect[,cghStart:ncol(data)]
maxY <- max(cgh[,phen1indices])
minY <- min(cgh[,phen1indices])
# Write folder
writeDir = paste(begPath, '/Data/', dataSet,'/',subdir, '/vis/ind_profiles/',phenotype,'/', sep="", collaspe=NULL);
if(!file.exists(writeDir)) {
dir.create(writeDir, recursive=TRUE);
}
print(paste("On ", chrArmSect, sep=''));
profilePath <- paste(writeDir, phenotype,'_',chrArmSect,'.pdf', sep='');
pdf(profilePath, width=9, height=6);
par(mfrow=c(3,2), cex.main=1.5,cex.lab=1.2,mar=c(4,3,2,2) + 0.1);
# for only 2 plots
# pdf(profilePath, width=9, height=3.3);
# par(mfrow=c(1,2), cex.lab=1, cex.main=1.23, mar=c(4,3,2,2) + 0.1);
for(p in phen1indices) {
profile.title <- paste('Patient ', colnames(cgh[p]), ' on ', chrArmSect, sep='');
profile.xlab <- 'Mbp';
profile.ylab <- 'log2 ratio';
# bp <- data[caIndices, 5]; #kwek has bp on the 5th column
# bp <- data[caIndices, 4]; # bp is the 4th column in horlings
patData <- cgh[, p];
plot(Mbp, patData, main=profile.title, xlab="", ylab="", ylim = c(minY, maxY), pch=20, type="o");
# plot(bp, patData, main=profile.title, xlab="", ylab="", ylim = c(-10, 10), pch=20, type="o");
abline(h=c(0), lty=2, col="black");
# abline(v=72021300, lty=3, col="black");
# abline(v=90704922, lty=3, col="black");
# abline(v=81980978, lty=3, col="red");
# abline(v=101292561, lty=3, col="red");
}
dev.off();