diff --git a/Postre_app.zip b/Postre_app.zip index e2284c1..4b09d75 100644 Binary files a/Postre_app.zip and b/Postre_app.zip differ diff --git a/Postre_app/.Rhistory b/Postre_app/.Rhistory index b1b2e24..134a286 100644 --- a/Postre_app/.Rhistory +++ b/Postre_app/.Rhistory @@ -1,512 +1,512 @@ -genesWithSignal<-genesWithSignal$longitudPicos -nroObservacion_limiteSuperior_RegionKneePoint<-elbowPoint[1,1] ##Pq va en decreasing -nroObservacion_limiteInferior_RegionKneePoint<-elbowPoint[1,2] ##Pq va en decreasing -kneePoint_ejeX<-elbowPoint[1,3] -elbowPoint<-elbowPoint[1,1]##ESE -genesWithSignal[elbowPoint] -targetLongitudPico<-genesWithSignal[elbowPoint] -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in ESCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=12000, paste0(targetLongitudPico, " bp"), col="red") -thresh<-targetLongitudPico -##################################################################### -##### seleccion genes (TSS) de desarrollo, aquellos cuyo overalapped H3K27me3 peak > thresh -##################################################################### -genesDesarrollo_allInfo<-developForPlot[developForPlot$longitudPicos>=thresh,] -selectedDevelopGenes<-sort(unique(as.character(genesDesarrollo_allInfo$gene))) -######################################################################## -##### no nos interesan ni los LINC ni los LOC asi que vamos a quitarlos -######################################################################## -AllGenes<-as.character(unique(developForPlot$gene))#27090 -LINCs<-AllGenes[grep("^LINC.*", AllGenes)] -LOCs<-AllGenes[grep("^LOC[0-9].*", AllGenes)] -NotRelevant<-c(LINCs,LOCs)#LINCs and LOCs -########################################################################## -###### seleccion de los TSS de interes -########################################################################## -selectedDevelopGenes<-selectedDevelopGenes[!(selectedDevelopGenes %in% NotRelevant)] -print(thresh) -print(length(selectedDevelopGenes)) -#################### -## Saving results -#################### -hESC_polyCombGenes<-selectedDevelopGenes -save(hESC_polyCombGenes, file = "~/Dropbox/Cantabria/PhD_Project/Resultados/genesDesarrollo/Robjects/hESC_polyCombGenes.RData") -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/ESC.tiff", units="in", +for(pheno in allPhenos){ +print(pheno) +for(tMode in runningMode_options){ +print(tMode) +##subset() does not work well with a==a, important to know... +filt_AllInfo<-subset(All_InfoPredictions, runMode==tMode & Phenotype==pheno) +iterInfo<-backBoneMat +#Get statistics +iterInfo$N_Patients<-nrow(filt_AllInfo) +iterInfo$PercentagePredictions<-round((sum(filt_AllInfo$PathogenicScore>=0.8)/nrow(filt_AllInfo))*100, digits = 0) +iterInfo$Phenotype<-pheno +iterInfo$Mode<-tMode +#Adding info +summaryInfo<-rbind(summaryInfo, iterInfo) +} +} +summaryInfo$Phenotype<-factor(summaryInfo$Phenotype, +levels=allPhenos, +ordered = TRUE)##For the ggplot +lapply(summaryInfo, class) +##Quiero el de multiple groups +# http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization +library(ggplot2) +# x11()##TO AVOID GGPLOT2 MASSIVE COLLAPSE, OR USE GGSAVE +##He hecho un recorte del grafico, luego al insertarlo en office, he quitado las labels y las he puesto manualmente con mucha mejor resolución, asi como la leyenda +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/figures_and_tables/Figures/Figure4/Fig4a.tiff", units="in", +width=10, height=5, res=300) +p<-ggplot(data=summaryInfo, aes(x=Phenotype, y=PercentagePredictions, fill=Mode)) + +geom_bar(stat="identity", position=position_dodge()) +coord_flip() +p + theme_classic() +dev.off() +rm(list=ls())##Cleaning variables +#################################################################### +## Analyisng frequency of Long-Range vs Direct +# panel b) +#################################################################### +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/StandardMode_ParsedResults_MultiplePatientAnalyses.RData") +InfoPredictions<-cohort_results_standard$candidateGenesInfo +##Withouth Filt +allPhenoAllSV<-prop.table(colSums(InfoPredictions[,c("N_LR_Mech","N_Direct_Mech")])) +##Filt for Inv and Transloc +filt_InfoPred<-subset(InfoPredictions, TypeSV=="Inversion" | TypeSV=="Translocation") +allPheno_InvTrans<-prop.table(colSums(filt_InfoPred[,c("N_LR_Mech","N_Direct_Mech")])) +##Filt for Deletions Duplications +filt_InfoPred<-subset(InfoPredictions, TypeSV=="Deletion" | TypeSV=="Duplication") +allPheno_DelDup<-prop.table(colSums(filt_InfoPred[,c("N_LR_Mech","N_Direct_Mech")])) +########################### +## Hacer Stacked Barplots +########################### +library(ggplot2) +#Differences between phenos probably arised due to differenceson the depth of enhancer maps annotation +#Stacked barplot for all +df_stacked<-data.frame("percentages"=c(allPhenoAllSV,allPheno_DelDup, allPheno_InvTrans), +"typeMech"=factor(rep.int(x=c("LongRange","Direct"),times = 3), +levels = c("LongRange","Direct"), ordered = TRUE), +"groupObserv"=factor(c(rep.int(x="allPhenoAllSV", times=2), +rep.int(x="allPhenoDelDup", times=2), +rep.int(x="allPhenoInvTrans", times=2)), +levels = c("allPhenoAllSV","allPhenoDelDup","allPhenoInvTrans"), ordered = TRUE) +) +###Generating StackedBarplot +library(ggplot2) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/figures_and_tables/Figures/Figure4/Fig4c.tiff", units="in", +width=5, height=5, res=300) +ggplot(df_stacked, aes(x=groupObserv, y=percentages, fill=typeMech))+ +geom_bar(stat = "identity", width = .7) + +ggtitle("Distribution Pathological mechanisms") + +theme(axis.text=element_text(size=12)) + +scale_fill_manual(values = c("#e699ff","#fce94f")) + theme_classic() +dev.off() +# 660066 +#fce94f +###e699ff +rm(list = ls()) +#################################################################################### +## New panel, comparing % pathogenic predictions +#################################################################################### +##Pathogenic SVs 270 patients +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/StandardMode_ParsedResults_MultiplePatientAnalyses.RData") +InfoPredictions_Standard<-cohort_results_standard$candidateGenesInfo +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/HighSpecificityMode_ParsedResults_MultiplePatientAnalyses.RData") +InfoPredictions_HighSpecificity<-cohort_results_hiSpe$candidateGenesInfo +#For Standard Mode +sum(InfoPredictions_Standard$PathogenicScore>=0.8)/nrow(InfoPredictions_Standard) +#For High Specificity Mode +sum(InfoPredictions_HighSpecificity$PathogenicScore>=0.8)/nrow(InfoPredictions_HighSpecificity) +##################################### +##Loading healthy svs predictions +##################################### +##Standard Mode +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/candidateGenesInfo_10000_HealthyOriginal_SVs.RData") +postre_10000SVs<-originalHealthy_candidateGenesInfo +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/candidateGenesInfo_2980_HealthyOriginal_SVs.RData") +postre_2980SVs<-originalHealthy_candidateGenesInfo +postre_healthy_standard<-unique(rbind(postre_10000SVs, postre_2980SVs))##unique to avoid duplicated info counted twice +rm(postre_10000SVs, postre_2980SVs) +sum(postre_healthy_standard$PathogenicScore>=0.8)/nrow(postre_healthy_standard)*100#4.3% +##High specificity mode +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/candidateGenesInfo_10000_HighSpecificity_HealthyOriginal_SVs.RData") +postre_10000SVs<-originalHealthy_candidateGenesInfo +load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/candidateGenesInfo_2980_HighSpecificity_HealthyOriginal_SVs.RData") +postre_2980SVs<-originalHealthy_candidateGenesInfo +postre_healthy_standard<-unique(rbind(postre_10000SVs, postre_2980SVs))##unique to avoid duplicated info counted twice +rm(postre_10000SVs, postre_2980SVs) +sum(postre_healthy_standard$PathogenicScore>=0.8)/nrow(postre_healthy_standard)*100#1% +# For standard mode +# 59% and 4% +# For high specificity mode +# 23% and 1% +dataPlot<-data.frame("percentages"=c(59,4, +23,1), +"Mode"=factor(x=c("St","St","Hs","Hs"), levels = c("St","Hs"), ordered = TRUE), +"groupSV"=factor(x=c("Patho","Healthy", "Patho","Healthy"), +levels = c("Patho","Healthy"), ordered = TRUE)) +library(ggplot2) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", width=5, height=5, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in ESCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=12000, paste0(targetLongitudPico, " bp"), col="red") +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=10, height=10, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() +dev.off() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=10, height=6, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() dev.off() -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/ESC.tiff", units="in", +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=10, height=8, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() +dev.off() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=10, height=8, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() +dev.off() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=10, height=8, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.2, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() +dev.off() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", width=8, height=4, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in ESCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=12000, paste0(targetLongitudPico, " bp"), col="red") +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.2, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() dev.off() -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/ESC.tiff", units="in", -width=7, height=4, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in ESCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=12000, paste0(targetLongitudPico, " bp"), col="red") +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.2, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() dev.off() -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/ESC.tiff", units="in", -width=6, height=3.8, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in ESCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=12000, paste0(targetLongitudPico, " bp"), col="red") +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() dev.off() -############################################################################## -##Master Regulators, those above the inflection point of the H3K27me3 curve -############################################################################## -###### filtering OverlapResults -## si los transcritos comparten TSS nos quedaremos con los 2 como un único -## pero si no lo comparten los trataremos por separado -load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/genesDesarrollo/bofsMetilacion/pi2_findingDevelopGenes.RData") -#vamos a trabajar con cada TSS por separado -#de forma que si los dos tienen un pico de H3K27me3, trabajaremos con los 2 -# y si solo hay uno, nos quedaremos con ese solo -#no obstante si que nos quedaremos con una única fila, si los dos transcritos presentan el mismo TSS y estan mapeados con el mismo pico -dim(developmentMatrix) -developmentMatrix<-unique(developmentMatrix) # de 34513 a 34513 (expected) -dim(developmentMatrix) -developmentMatrix<-as.data.frame(developmentMatrix) -#### anyadir columna lengths -#las pos picos estan como factores, las pasamos a numerico -developmentMatrix$peakStart<-as.numeric(as.character(developmentMatrix$peakStart)) -developmentMatrix$peakEnd<-as.numeric(as.character(developmentMatrix$peakEnd)) -developmentMatrix$longitudPicos<-developmentMatrix$peakEnd - developmentMatrix$peakStart -min(developmentMatrix$longitudPicos) #0 as expected -developForPlot<-developmentMatrix -rm(developmentMatrix) -developForPlot<-developForPlot[order(developForPlot$longitudPicos,decreasing = F),] -################################### -## Computing line inflexion point -################################### -library(inflection) -######################### -##con el metodo ESE -##Ahora si quitamos todos los que tienen 0 bases de H3K27me3 -##Reordenamos de mayor a menor para tenerlo como un scree plot -genesWithSignal<-developForPlot[order(developForPlot$longitudPicos, decreasing = TRUE),] -#To compute inflection point excluding the 0 values -genesWithSignal<-subset(genesWithSignal, longitudPicos>0)$longitudPicos -##Tal y como indican en el paper para el scree plot, que es el mismo concepto -elbowPoint<-findiplist(x = 1:length(genesWithSignal), y = genesWithSignal, -index = 0## -) -##Check if method EDE is applicable -if(is.na(elbowPoint[2,3])){ -print("method EDE not applicable") -} -##Loading again data of genes without exluding those without signal (peak length 0) -genesWithSignal<-developForPlot[order(developForPlot$longitudPicos, decreasing = TRUE),] -genesWithSignal<-genesWithSignal$longitudPicos -nroObservacion_limiteSuperior_RegionKneePoint<-elbowPoint[1,1] ##Pq va en decreasing -nroObservacion_limiteInferior_RegionKneePoint<-elbowPoint[1,2] ##Pq va en decreasing -kneePoint_ejeX<-elbowPoint[1,3] -elbowPoint<-elbowPoint[1,1]##ESE -genesWithSignal[elbowPoint] -targetLongitudPico<-genesWithSignal[elbowPoint] -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in NCCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=16000, paste0(targetLongitudPico, " bp"), col="red") -thresh<-targetLongitudPico -##################################################################### -##### seleccion genes (TSS) de desarrollo, aquellos cuyo overalapped H3K27me3 peak > thresh -##################################################################### -genesDesarrollo_allInfo<-developForPlot[developForPlot$longitudPicos>=thresh,] -selectedDevelopGenes<-sort(unique(as.character(genesDesarrollo_allInfo$gene))) -######################################################################## -##### no nos interesan ni los LINC ni los LOC asi que vamos a quitarlos -######################################################################## -AllGenes<-as.character(unique(developForPlot$gene))#27090 -LINCs<-AllGenes[grep("^LINC.*", AllGenes)] -LOCs<-AllGenes[grep("^LOC[0-9].*", AllGenes)] -NotRelevant<-c(LINCs,LOCs)#LINCs and LOCs -########################################################################## -###### seleccion de los TSS de interes -########################################################################## -selectedDevelopGenes<-selectedDevelopGenes[!(selectedDevelopGenes %in% NotRelevant)] -print(thresh) -print(length(selectedDevelopGenes)) -#################### -## Saving results -#################### -hs_pi2_polyCombGenes<-selectedDevelopGenes -save(hs_pi2_polyCombGenes, file = "~/Dropbox/Cantabria/PhD_Project/Resultados/genesDesarrollo/Robjects/hs_pi2_polyCombGenes.RData") -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/NCCs.tiff", units="in", -width=6, height=3.8, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in NCCs", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=16000, paste0(targetLongitudPico, " bp"), col="red") +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.5, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() dev.off() -############################################################################## -##Master Regulators, those above the inflection point of the H3K27me3 curve -############################################################################## -###### filtering OverlapResults -## si los transcritos comparten TSS nos quedaremos con los 2 como un único -## pero si no lo comparten los trataremos por separado -load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/genesDesarrollo/cardiomiocitos/heart_findingDevelopGenes.RData") -#vamos a trabajar con cada TSS por separado -#de forma que si los dos tienen un pico de H3K27me3, trabajaremos con los 2 -# y si solo hay uno, nos quedaremos con ese solo -#no obstante si que nos quedaremos con una única fila, si los dos transcritos presentan el mismo TSS y estan mapeados con el mismo pico -dim(developmentMatrix) -developmentMatrix<-unique(developmentMatrix) # de 34513 a 34513 (expected) -dim(developmentMatrix) -developmentMatrix<-as.data.frame(developmentMatrix) -#### anyadir columna lengths -#las pos picos estan como factores, las pasamos a numerico -developmentMatrix$peakStart<-as.numeric(as.character(developmentMatrix$peakStart)) -developmentMatrix$peakEnd<-as.numeric(as.character(developmentMatrix$peakEnd)) -developmentMatrix$longitudPicos<-developmentMatrix$peakEnd - developmentMatrix$peakStart -min(developmentMatrix$longitudPicos) #0 as expected -developForPlot<-developmentMatrix -rm(developmentMatrix) -developForPlot<-developForPlot[order(developForPlot$longitudPicos,decreasing = F),] -################################### -## Computing line inflexion point -################################### -library(inflection) -######################### -##con el metodo ESE -##Ahora si quitamos todos los que tienen 0 bases de H3K27me3 -##Reordenamos de mayor a menor para tenerlo como un scree plot -genesWithSignal<-developForPlot[order(developForPlot$longitudPicos, decreasing = TRUE),] -#To compute inflection point excluding the 0 values -genesWithSignal<-subset(genesWithSignal, longitudPicos>0)$longitudPicos -##Tal y como indican en el paper para el scree plot, que es el mismo concepto -elbowPoint<-findiplist(x = 1:length(genesWithSignal), y = genesWithSignal, -index = 0## -) -##Check if method EDE is applicable -if(is.na(elbowPoint[2,3])){ -print("method EDE not applicable") -} -##Loading again data of genes without exluding those without signal (peak length 0) -genesWithSignal<-developForPlot[order(developForPlot$longitudPicos, decreasing = TRUE),] -genesWithSignal<-genesWithSignal$longitudPicos -nroObservacion_limiteSuperior_RegionKneePoint<-elbowPoint[1,1] ##Pq va en decreasing -nroObservacion_limiteInferior_RegionKneePoint<-elbowPoint[1,2] ##Pq va en decreasing -kneePoint_ejeX<-elbowPoint[1,3] -elbowPoint<-elbowPoint[1,1]##ESE -genesWithSignal[elbowPoint] -targetLongitudPico<-genesWithSignal[elbowPoint] -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in Cardiomyocites", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=13000, paste0(targetLongitudPico, " bp"), col="red") -thresh<-targetLongitudPico -##################################################################### -##### seleccion genes (TSS) de desarrollo, aquellos cuyo overalapped H3K27me3 peak > thresh -##################################################################### -genesDesarrollo_allInfo<-developForPlot[developForPlot$longitudPicos>=thresh,] -selectedDevelopGenes<-sort(unique(as.character(genesDesarrollo_allInfo$gene))) -######################################################################## -##### no nos interesan ni los LINC ni los LOC asi que vamos a quitarlos -######################################################################## -AllGenes<-as.character(unique(developForPlot$gene))#27090 -LINCs<-AllGenes[grep("^LINC.*", AllGenes)] -LOCs<-AllGenes[grep("^LOC[0-9].*", AllGenes)] -NotRelevant<-c(LINCs,LOCs)#LINCs and LOCs -########################################################################## -###### seleccion de los TSS de interes -########################################################################## -selectedDevelopGenes<-selectedDevelopGenes[!(selectedDevelopGenes %in% NotRelevant)] -print(thresh) -print(length(selectedDevelopGenes)) -#################### -## Saving results -#################### -hs_cardiomiocytes_polyCombGenes<-selectedDevelopGenes -save(hs_cardiomiocytes_polyCombGenes, file = "~/Dropbox/Cantabria/PhD_Project/Resultados/genesDesarrollo/Robjects/hs_cardiomiocytes_polyCombGenes.RData") -tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/Revision_NAR/Manuscrito_New_Version/SupplementaryMaterial/SupplementaryFigures/Supplementary_Figure_1/Cardiomyocytes.tiff", units="in", -width=6, height=3.8, res=300) -plot(x=1:length(genesWithSignal), y=genesWithSignal, type = "l", lwd=3, main = "H3K27me3 peak size associated to TSS \n in Cardiomyocites", xlab = "TSS", ylab="H3K27me3 peak size") -abline(h=c(targetLongitudPico),col="red", lwd=2, lty=2) -abline(v=nroObservacion_limiteSuperior_RegionKneePoint,col="red", lwd=2, lty=2) -abline(v=kneePoint_ejeX, col="blue", lwd=2, lty=2) -abline(v=nroObservacion_limiteInferior_RegionKneePoint, col="green", lwd=2, lty=2) -text(x= 20000, y=13000, paste0(targetLongitudPico, " bp"), col="red") +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = dataPlot, aes(x = Mode, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +View(dataPlot) +subset(dataPlot,Mode=="St") +gg<-dataPlot +gg<-dataPlot +subset(gg,Mode=="St") +gg +gg<-subset(gg,Mode=="St") +gg +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg[2,1]<-3.2 +gg +gg<-dataPlot +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg[2,1]<-3.2 +ggplot(data = gg, aes(x = group, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +gg +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +# theme_set(theme_classic()) +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +gg<-dataPlot +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg[2,1]<-3.2 +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +##For presentations plot +gg<-dataPlot +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg +##For presentations plot +gg<-dataPlot +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg[2,1]<-3.2 +gg +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.4, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=4, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=2, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=2, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +gg +gg$groupSV +gg$groupSV<-factor(as.character(gg$groupSV), levels = c("Healthy","Patho"), ordered = TRUE) +gg +gg$groupSV +##For presentations plot +gg<-dataPlot +gg<-subset(gg,Mode=="St") +gg$Mode<-NULL +gg[2,1]<-3.2 +gg$groupSV<-factor(as.character(gg$groupSV), levels = c("Healthy","Patho"), ordered = TRUE) +gg +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=2, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#00ccff","#cc66ff"))+theme(legend.position = "none") + theme_classic() + coord_flip() dev.off() -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -shiny::runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -shiny::runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -shiny::runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -############################# -## PCA-Initial exploration -## FOR fpkms -############################# -load(file = "/home/victor/Documentos/phD/manyFolders/colaboraciones/maria/rna_seq/processedFiles/maria_rnaSeq_2021_fpkm.RData") +tiff("/home/victor/Documentos/phD/ArticlesWritting/Postre_manuscript/FiguresPresentationTool/FigPercentagesPathoVsHealthy.tiff", units="in", +width=8, height=2, res=300) +ggplot(data = gg, aes(x = groupSV, y = percentages, fill = groupSV )) + +geom_bar(stat = "identity", width = 0.8, +position=position_dodge(width = 0.5))+ +scale_fill_manual(values = c("#cc66ff","#00ccff"))+theme(legend.position = "none") + theme_classic() + coord_flip() +dev.off() +##Loading Table, Manually Generated +##Info obtained from script: ~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/SV_LocalAnalysis/MultiplePatientAnalysis/evaluating_Sensitivity_OnPatientsData/3_ParsingPatientsPredictions.R +infoPredictions<-read.delim(file = "~/Dropbox/Cantabria/PhD_Project/DatosPHD/software_SV/dataManuscript/PredictionsPercentages.tab", +sep="\t", +header = TRUE, +stringsAsFactors = TRUE) ##This true, buecause for GGPLOT better +infoPredictions<-subset(infoPredictions, Mode=="Standard") +infoPredictions$Mode<-"Pathogenic" +################################################ +##Meter datos de %s predichos para controles ############################################### -##Loading relevant genes in counts script -#load(file = "/home/victor/Documentos/phD/colaboraciones/maria/rna_seq/processedFiles/test_genesSelectedCountsAnalyses.RData") -#fpkm_masterMatrix<-fpkm_masterMatrix[fpkm_masterMatrix$gene_id %in% genesCountsAnalyses,] -#################### -##remvoing character columns -num_fpkm<-fpkm_masterMatrix[,-c(1,2)] -num_fpkm<-log2(num_fpkm+1) ##log2 transformation essential to get meaningful results on plots, reducing effect of hugely expressed genes -filteringCols<-c("WT_1A_D0","WT_1B_D0","WT_2A_D0","WT_2B_D0", -"KO_1A_D0", "KO_1B_D0", "KO_2A_D0", "KO_2B_D0", "WT_1A_D3", "WT_1B_D3", -"WT_2A_D3", "WT_2B_D3", "KO_1A_D3", "KO_1B_D3", "KO_2A_D3", "KO_2B_D3" , -"RNA_WT_D6_1", "RNA_WT_D6_2", "RNA_WT_D6_3", "RNA_Z2KO_1_D6_1", -"RNA_Z2KO_1_D6_2", "RNA_Z2KO_1_D6_3") -num_fpkm<-num_fpkm[,filteringCols] -# ################################## -# ##Doing some clustering analyses -# -# library(rafalib) -# -# ##En primer lugar vamosa calcular al distancia entre cada par de observaciones -# ##La funcion dist, calcula la distancia entre las filas de la matriz. Por tanto, pensad si teneis que transponer o no la matriz... -# ##Por defecto calcula la distancia euclidea -# d <- dist(t(num_fpkm)) -# -# #View(as.matrix(d)) -# -# ##Realizamos el clustering jerarquico atendiendo a las distancias calculadas entre cada par de observaciones -# hc_analysis <- hclust(d) -# hc_analysis -# -# ##Representamos el arbol pintado por colores -# # plot(hc_analysis,labels=colnames(num_fpkm),cex=0.5) -# -# -# ## -# tissuesColors<-c(rep.int(x = "red",times=4), -# rep.int(x = "darkred",times=4), -# rep.int(x = "green",times=4), -# rep.int(x = "darkgreen",times=4), -# rep.int(x = "blue",times=4), -# rep.int(x = "darkblue",times=4), -# rep.int(x = "orange",times=3) -# ) -# -# -# ##Plot pintando por colores -# myplclust(hc_analysis, labels=colnames(num_fpkm), lab.col=tissuesColors, cex=1) -# -# -# ###Remove for pca study -# # fpkm_masterMatrix<-fpkm_masterMatrix[!genesNotExpNever,] -# # num_fpkm<-num_fpkm[!genesNotExpNever,] -# -# ##doing pca -# fun_doingPca<-function(targetMatrix, center, scale, PC_tag1, PC_tag2){ -# targetRes<-prcomp(x=t(targetMatrix), center = center, scale. = scale) -# -# plot(x=targetRes$x[,PC_tag1], -# y=targetRes$x[,PC_tag2], -# pch = 19, -# col=tissuesColors, -# -# xlab=PC_tag1, -# ylab=PC_tag2 -# -# ) -# -# # text(x=targetRes$x[,PC_tag1], -# # y=targetRes$x[,PC_tag2], -# # labels = colnames(targetMatrix), -# # col=tissuesColors) -# } -# -# fun_doingPca(targetMatrix = num_fpkm, center = TRUE, scale = FALSE, -# PC_tag1 = "PC1", PC_tag2 = "PC2") -############# -## PCA NICE -tissuesColors<-c(rep.int(x = "red",times=4), -rep.int(x = "darkred",times=4), -rep.int(x = "green",times=4), -rep.int(x = "darkgreen",times=4), -rep.int(x = "blue",times=4), -rep.int(x = "darkblue",times=4), -rep.int(x = "dodgerblue",times=3), -rep.int(x = "dodgerblue3",times=3), -rep.int(x = "orange",times=3) -) -tissuesColorsUnique<-c(rep.int(x = "red",times=1), -rep.int(x = "darkred",times=1), -rep.int(x = "green",times=1), -rep.int(x = "darkgreen",times=1), -rep.int(x = "blue",times=1), -rep.int(x = "darkblue",times=1), -rep.int(x = "dodgerblue",times=1), -rep.int(x = "dodgerblue3",times=1), -rep.int(x = "orange",times=1) -) -expDesignGroups<-c(rep.int(x = "WT_D0",times=4), -rep.int(x = "KO_D0",times=4), -rep.int(x = "WT_D3",times=4), -rep.int(x = "KO_D3",times=4), -rep.int(x = "WT_D6",times=3), -rep.int(x = "KO_D6",times=3) -) -##Metadatos, expDesignMatrix, en este caso un dataframe -expDesignMat<-data.frame("condition"= expDesignGroups) -rownames(expDesignMat)<-colnames(num_fpkm)##Mismo da el 1 que el 2 -##Check expDesign object -expDesignMat -##PCA -library(PCAtools) -x11() -data1_pca<-PCAtools::pca(mat = num_fpkm, -transposed = FALSE, -center = TRUE, -scale = FALSE, -metadata = expDesignMat -)#Se espera que las variables, en este caso los genes esten en las filas, como ya estan, no transponemos la matriz -screeplot(data1_pca, axisLabSize = 18, titleLabSize = 22, components = 1:10) -##Plot per a components 1 i 2 -biplot(data1_pca, colby = "condition", x="PC1", y="PC2", labSize = 0, title = "ZIC2 PCA") -ggg<-read.delim(file = "/home/victor/Documentos/phD/testing_CaptureC/GSE129378_Human_Capture.txt") -View(ggg) -227156 - 226254 -228156 - 225254 -223352 - 222450 -224352 - 221450 -128748439 - 128748253 -128749439 - 128747253 -227156 - 226254 -23386686 - 23385780 -5256556 - 5255391 -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -shiny::runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -runApp('Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app') -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") +info_Healthy<-infoPredictions +info_Healthy$N_Patients<-10000 +info_Healthy$Mode<-"Healthy" +info_Healthy$PercentagePredictions<-c(5.32,3.41,1.47,2.76)#c(2.7,2.6,1.5,2.2) +infoPredictions<-rbind(infoPredictions, info_Healthy) +lapply(infoPredictions, class) +p<-ggplot(data=infoPredictions, aes(x=Phenotype, y=PercentagePredictions, fill=Mode)) + +geom_bar(stat="identity", position=position_dodge()) +coord_flip() #+ theme_classic() +p ######################################### -## Performing Predictions Approach 2 +## Parsing Predictions Patients ######################################### -##Loading controls data -load(file = "~/Dropbox/Cantabria/PhD_Project/DatosPHD/fichasPacientes/healthyControls_approach2.RData") -View(healthyControls_readyForSoftware) -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -#################################################################### -## Script for HTML generation for Web with Patients Cohort Study -#################################################################### -##Use maybe the Standard info only -##Loading Standard Mode PARSED results -load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/StandardMode_ParsedResults_MultiplePatientAnalyses.RData") -######################################## -##HTML generation with cohort results -######################################## -###Setwd in the folder where all the app info is hosted -setwd("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/") -##Required functions -source("functions/multiple_SV_Functions/multipleStats_ExplorePreviousPat_htmlGeneration.R") -##AllPatientsInfo table, required for the Patients Info section +##CHECK PATIENTS WITH ERRORS RISED IF ANY +##TAKE AS REFERENCE SCRIPT: ~/Documentos/phD/SV_app_backup/SV_LocalAnalysis_backup_21Dic_2021/MultiplePatientAnalysis/evaluatingSpecificity_OnHealthyIndividuals/Considering_Most_Of_SVs/3_ParsingResults.R +############ +## Loading prediction results +load("~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/results_MultiplePatientAnalyses.RData") +## Loading SVs information load("~/Dropbox/Cantabria/PhD_Project/DatosPHD/fichasPacientes/AllPatientsInfo.RData") +######################## +# NOT TOUCHING FROM HERE. Use the remaining script above for all kind of prediction, either controls or pathogenic +# When doing them locally +######################## ##Phenotypes to be considered consideredPheno<-c("head_neck", "cardiovascular", "limbs", "neurodevelopmental")##As more phenos considered they will appear here -ExplorePreviousPatients_html<-multipleStats_htmlGeneration(cohort_results = cohort_results_standard, -consideredPheno = consideredPheno, -ids_append="PreviousPat",##to avoid conflicts with tables generated in multiples submission option -AllPatientsInfo=AllPatientsInfo, -explPreviousPatSection = TRUE) -########################################################################################### -##This is the html that is going to be loaded on the Explore Previous Patients Section -##Wrapp in html tags to avoid issues AND save html -########################################################################################### -ExplorePreviousPatients_html<-paste("
", -ExplorePreviousPatients_html, -"", -sep="") -fileConn<-file("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/html_scripts/ExplorePreviousPatients.html") -writeLines(ExplorePreviousPatients_html, fileConn) -close(fileConn) -#################################################################### -## Script for HTML generation for Web with Patients Cohort Study -#################################################################### -##Use maybe the Standard info only -##Loading Standard Mode PARSED results -load(file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/StandardMode_ParsedResults_MultiplePatientAnalyses.RData") -######################################## -##HTML generation with cohort results -######################################## ###Setwd in the folder where all the app info is hosted setwd("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/") +############################################## +##Loading Required Function +source("functions/multiple_SV_Functions/cohortResults_Parser.R") +############# +#Parsing predictions +cohort_results_hiSpe<-cohortResults_Parser(minScore = 0.8, all_patientResults = resultsPerMode$`High-Specificity`, +consideredPheno =consideredPheno,##Improve as more Phenos can be processed##consideredPheno,##Multiple Option, for now stick with head_neck +discardRelevantByBrokenGene = FALSE, +AllPatientsInfo = AllPatientsInfo ) +cohort_results_standard<-cohortResults_Parser(minScore = 0.8, all_patientResults = resultsPerMode$Standard, +consideredPheno =consideredPheno,##Improve as more Phenos can be processed##consideredPheno,##Multiple Option, for now stick with head_neck +discardRelevantByBrokenGene = FALSE, +AllPatientsInfo = AllPatientsInfo ) +################################################### +## Saving parsed results, Used to generate HTML +################################################### +save(cohort_results_standard, +file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/StandardMode_ParsedResults_MultiplePatientAnalyses.RData") +save(cohort_results_hiSpe, +file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/multiplePatientAnalysis/HighSpecificityMode_ParsedResults_MultiplePatientAnalyses.RData") +##Save info candidate genes for attempt of matching +originalPathogenic_candidateGenesInfo<-cohort_results_standard$candidateGenesInfo +save(originalPathogenic_candidateGenesInfo, +file = "~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/candidateGenesInfo_Pathogenic_SVs.RData") +################################### +## Exploring cohort Parsed results +################################### +targetPhenos<-c("head_neck", +"cardiovascular", +"limbs", +"neurodevelopmental")##As more phenos considered they will appear here ##Required functions -source("functions/multiple_SV_Functions/multipleStats_ExplorePreviousPat_htmlGeneration.R") -##AllPatientsInfo table, required for the Patients Info section -load("~/Dropbox/Cantabria/PhD_Project/DatosPHD/fichasPacientes/AllPatientsInfo.RData") +source("/home/victor/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/SV_LocalAnalysis/MultiplePatientAnalysis/functions/multiSV_analysis_stats_functions.R", +local = TRUE) +##Para High specificity +multiSV_analysis_stats(targetPhenos = targetPhenos, targetResults = cohort_results_hiSpe) +#Para Standard +multiSV_analysis_stats(targetPhenos = targetPhenos, targetResults = cohort_results_standard) +######################################### +## Parsing Predictions Approach 2 +######################################### +##CHECK PATIENTS WITH ERRORS RISED IF ANY +##TAKE AS REFERENCE SCRIPT: ~/Documentos/phD/SV_app_backup/SV_LocalAnalysis_backup_21Dic_2021/MultiplePatientAnalysis/evaluatingSpecificity_OnHealthyIndividuals/Considering_Most_Of_SVs/3_ParsingResults.R +############ +## Loading prediction results approach 2 +load("~/Dropbox/Cantabria/PhD_Project/Resultados/softwareObjects/Robjects/results_PredictionsHealthyControls_approach1.RData") +## Loading SVs information +load(file = "~/Dropbox/Cantabria/PhD_Project/DatosPHD/fichasPacientes/healthyControls_approach1.RData") +######## +#Renaming input data +AllPatientsInfo<-healthyControls_readyForSoftware +rm(healthyControls_readyForSoftware) +################################################################ +## Applying filtering for DelDup or INv to stratify results +################################################################ +##Required functions +source("/home/victor/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/SV_LocalAnalysis/MultiplePatientAnalysis/functions/multiSV_analysis_stats_functions.R", +local = TRUE) +## It can be "DELDUP" "INV" or "NONE" (doing nothing) +filteringCriteria<-"NONE" ##Also to analyze with TADA +# filteredInfo<-filtering_PatientsInfo(filteringCriteria = filteringCriteria, +# AllPatientsInfo = AllPatientsInfo, +# resultsPerMode = resultsPerMode) +# +# AllPatientsInfo<-filteredInfo$AllPatientsInfo +# resultsPerMode<-filteredInfo$resultsPerMode +length(resultsPerMode$Standard)==nrow(AllPatientsInfo) +length(resultsPerMode$`High-Specificity`)==nrow(AllPatientsInfo) +######################## +# NOT TOUCHING FROM HERE. Use the remaining script above for all kind of prediction, either controls or pathogenic +# When doing them locally +######################## ##Phenotypes to be considered consideredPheno<-c("head_neck", "cardiovascular", "limbs", "neurodevelopmental")##As more phenos considered they will appear here -ExplorePreviousPatients_html<-multipleStats_htmlGeneration(cohort_results = cohort_results_standard, -consideredPheno = consideredPheno, -ids_append="PreviousPat",##to avoid conflicts with tables generated in multiples submission option -AllPatientsInfo=AllPatientsInfo, -explPreviousPatSection = TRUE) -########################################################################################### -##This is the html that is going to be loaded on the Explore Previous Patients Section -##Wrapp in html tags to avoid issues AND save html -########################################################################################### -ExplorePreviousPatients_html<-paste("", -ExplorePreviousPatients_html, -"", -sep="") -fileConn<-file("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/html_scripts/ExplorePreviousPatients.html") -writeLines(ExplorePreviousPatients_html, fileConn) -close(fileConn) +###Setwd in the folder where all the app info is hosted +setwd("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/") +############################################## +##Loading Required Function +source("functions/multiple_SV_Functions/cohortResults_Parser.R") +############# +#Parsing predictions +# cohort_results_hiSpe<-cohortResults_Parser(minScore = 0.8, all_patientResults = resultsPerMode$`High-Specificity`, +# consideredPheno =consideredPheno,##Improve as more Phenos can be processed##consideredPheno,##Multiple Option, for now stick with head_neck +# discardRelevantByBrokenGene = FALSE, +# AllPatientsInfo = AllPatientsInfo ) +cohort_results_standard<-cohortResults_Parser(minScore = 0.8, all_patientResults = resultsPerMode$Standard, +consideredPheno =consideredPheno,##Improve as more Phenos can be processed##consideredPheno,##Multiple Option, for now stick with head_neck +discardRelevantByBrokenGene = FALSE, +AllPatientsInfo = AllPatientsInfo ) +shiny::runApp() +?tags +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() runApp() +masterWrapperSinglePrediction(patientInfo = patientData , minScore = minScore, +highScore = highScore, runMode = runMode_single, +user_tadMapInfo = user_tadMapInfo, +MultiDataList = MultiDataList) runApp() +tagEnhancersLabel +yPos_chr_WT<-13 ##variable to hold the position of the chr text in the WT situation in the Y axis +##Adjust image, to exclude spaces outside canvas (drawing area) +par(mar = c(0,0,0,0)) +##OPTION REMOVING AXIS +#UPON COMPLETION USING THIS +plot(x=0:20, y=0:20, type = "n", +ylim = yAxisLim, +xlim = xAxisLim, +xaxt = 'n', yaxt = 'n', bty = 'n', pch = '', ylab = '', xlab = '') ##Upon completion, REMOVE AXIS +texSizeChr<-2 +#Adding plot header +text(x=20, y=30, label=paste0("Graphical summary of ", gene, " regulatory domain in ", phase), cex = 1.7) +text(x=20,y=26, label="Simplification",cex=1.7) +#WT allelle label +text(x=20, y=20, label="Control Scenario", cex = 1.5) +##Coord for tads over X axis (Depending on the situation either two TADs or just one will be painted) +#Let's try make them wider two cm per side to ensure everything fits on the re-arrangement plots +tad_XCoord_OnLeftSide<-c(3,17,10)#c(5,15,10) ##c(3,17,10) +tad_XCoord_OnRightSide<-c(tad_XCoord_OnLeftSide[1]+10+11, +tad_XCoord_OnLeftSide[2]+10+11, +tad_XCoord_OnLeftSide[3]+10+11) +tad_XCoord_OnCenter<-c(13,27,20) +#Over Y axis, WT line +tad_YCoord_WildTypeLine<-c(0,0,15) +situation +text(x=5, y=yPos_chr_WT, label=chr_gene, cex = texSizeChr) +tad_X_cord<-tad_XCoord_OnLeftSide ##c(5,15,10) +info_drawingGENE_TAD<-paintGene_WT_TAD(tad_X_cord = tad_X_cord, +tad_Y_cord = tad_YCoord_WildTypeLine, +nEnh_initial_left = nEnh_initial_left, +nEnh_initial_right = nEnh_initial_right, +gene = gene, +gene_breakp_line_type = gene_breakp_line_type, +situation = situation, +patientResults = patientResults) runApp() runApp() +################################################################ +##Test en mi usuario antiguo, para que sea la zona de pruebas +################################################################ +rsconnect::deployApp('/home/victor/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/', +account = 'vicsanga') runApp() +################################################################ +##Test en mi usuario antiguo, para que sea la zona de pruebas +################################################################ +rsconnect::deployApp('/home/victor/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app/', +account = 'vicsanga') runApp() -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") -source("https://raw.githubusercontent.com/vicsanga/Postre/main/Postre_wrapper.R") diff --git a/Postre_app/app.R b/Postre_app/app.R index 910acf3..f4cbfba 100644 --- a/Postre_app/app.R +++ b/Postre_app/app.R @@ -2,13 +2,6 @@ ### POSTRE (Prediction of STRuctural variant Effects) ######################################################## -##Do not repeat devStages or phases Names! Even for different phenotypes!! - -##To consider bioconductor repos -##To avoid this error: https://community.rstudio.com/t/deployment-error-unable-to-determine-the-location-for-some-packages/102312 -# options(repos = BiocManager::repositories()) -#options('repos') - library(shiny) library(waiter) library(shinybusy) @@ -19,6 +12,13 @@ library(plotrix)##For enhancers, ellipse shape representation library(shape)##For curve arrows representation library(diagram)##For curve arrows representation +##To consider bioconductor repos +##To avoid this error: https://community.rstudio.com/t/deployment-error-unable-to-determine-the-location-for-some-packages/102312 +# options(repos = BiocManager::repositories()) +#options('repos') + + +########################################################### ###Setwd in the folder where all the app info is hosted # setwd("~/Dropbox/Cantabria/PhD_Project/ScriptsPhd/ScriptsParaUsoLocal/Postre/Postre_app") @@ -53,188 +53,12 @@ relevantChr<-c(paste("chr",1:22,sep = ""), "chrX")##chrY excluded not all data a #To avoid navbar collapse in smaller screens # https://stackoverflow.com/questions/21738417/bootstrap-remove-responsive-from-navbar -navbar_js<-"@media (max-width: 768px) { - .navbar-header { - float: left; - } - - .navbar { - border-radius: 4px; - min-width: 400px; - } - - .nav-tabs-justified > li > a { - border-bottom: 1px solid #ddd; - border-radius: 4px 4px 0 0; - } - .nav-tabs-justified > .active > a, - .nav-tabs-justified > .active > a:hover, - .nav-tabs-justified > .active > a:focus { - border-bottom-color: #fff; - } - - .nav-justified > li { - display: table-cell; - width: 1%; - } - .nav-justified > li > a { - margin-bottom: 0; - } - - .nav-tabs.nav-justified > li > a { - border-bottom: 1px solid #ddd; - border-radius: 4px 4px 0 0; - } - .nav-tabs.nav-justified > .active > a, - .nav-tabs.nav-justified > .active > a:hover, - .nav-tabs.nav-justified > .active > a:focus { - border-bottom-color: #fff; - } - - .nav-tabs.nav-justified > li { - display: table-cell; - width: 1%; - } - .nav-tabs.nav-justified > li > a { - margin-bottom: 0; - } - - .navbar-right .dropdown-menu { - right: 0; - left: auto; - } - .navbar-right .dropdown-menu-left { - right: auto; - left: 0; - } - .container { - min-width: 400px; - } - - .navbar-collapse { - width: auto; - border-top: 0; - box-shadow: none; - } - .navbar-collapse.collapse { - display: block !important; - height: auto !important; - padding-bottom: 0; - overflow: visible !important; - } - .navbar-collapse.in { - overflow-y: visible; - } - .navbar-fixed-top .navbar-collapse, - .navbar-static-top .navbar-collapse, - .navbar-fixed-bottom .navbar-collapse { - padding-right: 0; - padding-left: 0; - } - - .container > .navbar-header, - .container-fluid > .navbar-header, - .container > .navbar-collapse, - .container-fluid > .navbar-collapse { - margin-right: 0; - margin-left: 0; - } - - .navbar-static-top { - border-radius: 0; - } - - .navbar-fixed-top, - .navbar-fixed-bottom { - border-radius: 0; - } - - .navbar-toggle { - display: none; - } - - .navbar-nav { - float: left; - margin: 0; - } - .navbar-nav > li { - float: left; - } - .navbar-nav > li > a { - padding-top: 15px; - padding-bottom: 15px; - } - .navbar-nav.navbar-right:last-child { - margin-right: -15px; - } - - .navbar-left { - float: left !important; - } - .navbar-right { - float: right !important; - } - - .navbar-form .form-group { - display: inline-block; - margin-bottom: 0; - vertical-align: middle; - } - .navbar-form .form-control { - display: inline-block; - width: auto; - vertical-align: middle; - } - .navbar-form .control-label { - margin-bottom: 0; - vertical-align: middle; - } - .navbar-form .radio, - .navbar-form .checkbox { - display: inline-block; - padding-left: 0; - margin-top: 0; - margin-bottom: 0; - vertical-align: middle; - } - .navbar-form .radio input[type='radio'], - .navbar-form .checkbox input[type='checkbox'] { - float: none; - margin-left: 0; - } - .navbar-form .has-feedback .form-control-feedback { - top: 0; - } - - .navbar-form { - width: auto; - padding-top: 0; - padding-bottom: 0; - margin-right: 0; - margin-left: 0; - border: 0; - -webkit-box-shadow: none; - box-shadow: none; - } - .navbar-form.navbar-right:last-child { - margin-right: -15px; - } - - .navbar-text { - float: left; - margin-right: 15px; - margin-left: 15px; - } - .navbar-text.navbar-right:last-child { - margin-right: 0; - } -}" +# navbar_js<-"" ui <-function(req){ return(div( class="container", - ##Mirar esto del title que no me acaba lo de meterlo como si no existiera nada div(class="titleBrowser", titlePanel(title="POSTRE: Prediction Of STRuctural variant Effects") ), @@ -271,24 +95,7 @@ ui <-function(req){ div(class="inp2", ##Patient Phenotype wellPanel( - # selectInput( - # inputId = "phenoPatient", - # label = "Phenotype", - # ##Choices full name is then matched & renamed in GenomicData_Loader.R - # ##Alphabet order - # choices = c("Cardiovascular", - # "Head & Neck", - # "Limbs", - # "Neurodevelopmental" - # ), - # selected ="Head & Neck" ) - - # https://shiny.rstudio.com/reference/shiny/1.6.0/checkboxGroupInput.html checkboxGroupInput(inputId = "phenoPatient", label = "Phenotype", - # c("Head & Neck" = "head_neck", - # "Cardiovascular" = "cardiovascular", - # "Limbs" = "limbs", - # "Neurodevelopmental" = "neurodevelopmental"), c("Cardiovascular" = "cardiovascular", "Head & Neck" = "head_neck", "Limbs" = "limbs", @@ -345,14 +152,6 @@ ui <-function(req){ ################################# ##Adding Advanced Features Menu ################################# - ##Selecting Running Mode - ##Drop down menu shiny - #https://rdrr.io/cran/shinyWidgets/man/dropdown.html - ##No me acaba, hacer tipo webGestalt - # https://www.w3schools.com/howto/howto_js_collapsible.asp - ##https://www.w3schools.com/howto/tryit.asp?filename=tryhow_js_collapsible - ##Con la parte de AddIcons - ##Y pillar el html que genera Shiny, yeah div(class="inp6", wellPanel(HTML(' @@ -494,12 +293,8 @@ for (i = 0; i < coll.length; i++) { ## Multiple SV Input Panel ###################### div(class="sideBarClassMultiple", - ##If I put here on the html, h2 tag it gets styled as the header, so style features - ##are connected - - #START MULTIPLE SUBMISSION div(class="formAndTitle_patient_Multiple_Input", - ##Patient SV type + HTML('Phenotype | -Developmental Stages Considered | -Stage Additional Information | -Chip-Seq Source (for enhancer annotation) | -Enhancer annotation, Main Procedure | -RNA-Seq Source | -HiC-Source – TAD maps | -
Cardiovascular | -Day 5 | -Day 5 cardiac mesodermal cells, from 80 days ESC differentiation towards ventricular cardiomocytes | -https://doi.org/10.1038/s41588-019-0479-7 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://doi.org/10.1038/s41588-019-0479-7 | -TAD map generated upon processing Heart day 5 HiC data from: https://doi.org/10.1038/s41588-019-0479-7 | -
Day 7 | -Day 7 cardiac progenitors, from 80 days ESC differentiation towards ventricular cardiomocytes | -https://doi.org/10.1038/s41588-019-0479-7 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://doi.org/10.1038/s41588-019-0479-7 | -TAD map generated upon processing Heart day 7 HiC data from: https://doi.org/10.1038/s41588-019-0479-7 | -|
Day 15 | -Day 15 primitive cardiomyocytes, from 80 days ESC differentiation towards ventricular cardiomocytes | -https://doi.org/10.1038/s41588-019-0479-7 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://doi.org/10.1038/s41588-019-0479-7 | -TAD map generated upon processing Heart day 15 HiC data from: https://doi.org/10.1038/s41588-019-0479-7 | -|
Day 80 | -Day 80 ventricular cardiomiocytes, from 80 days ESC differentiation towards ventricular cardiomocytes | -https://doi.org/10.1038/s41588-019-0479-7 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://doi.org/10.1038/s41588-019-0479-7 | -TAD map generated upon processing Heart day 80 HiC data from: https://doi.org/10.1038/s41588-019-0479-7 | -|
Head-Neck | -Neural Crest Early | -Neural Crest Data of hNCC differentiation | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE28876 | -p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE28876 | -ESC TAD map provided in 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip |
-
Neural Crest Late | -Neural Crest Data of hNCC differentiation , check GEO links for detailed explanation?¿ | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70751 | -p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70751 | -ESC TAD map provided in 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip |
- |
PalateCS20 | -Embryonic Palate Carnagie Stage 20 | -https://pubmed.ncbi.nlm.nih.gov/29719267/ | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://elifesciences.org/articles/15657 | -ESC TAD map provided in 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip |
- |
Limbs | -EmbryonicLimb1 | -Corresponds with embryonic Lower Limb data | -https://www.nature.com/articles/s41467-020-17305-2 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://elifesciences.org/articles/15657 |
- ESC TAD map provided in 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip |
-
EmbryonicLimb2 | -Corresponds with embryonic Upper Limb data | -https://www.nature.com/articles/s41467-020-17305-2 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -https://elifesciences.org/articles/15657 |
- ESC TAD map provided in 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip |
- |
Neurodevelopmental | -PfcGw15 | -Brain prefrontal cortex Gestational Week 15 | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -From database: https://www.brainspan.org/static/home Expression file considered (summarized to genes file) Expression value as the average of the pcw13 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) cortex] and DFC(dorsolateral prefrontal cortex) |
- Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in: https://www.cell.com/cell-reports/fulltext/S2211-1247(16)31481-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124716314814%3Fshowall%3Dtrue Excel file containing boundary maps: https://www.cell.com/cms/10.1016/j.celrep.2016.10.061/attachment/c3954472-3378-4fd7-aa33-9a2d085f7bbd/mmc4.xlsx |
-
PfcGw18 | -Brain prefrontal cortex Gestation Week 18 | -https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | -H3K27ac peak with distance > 10Kb to protein coding TSS | -From database: https://www.brainspan.org/static/home Expression file considered (summarized to genes file) Expression value as the average of the pcw16 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) |
- Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in: https://www.cell.com/cell-reports/fulltext/S2211-1247(16)31481-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124716314814%3Fshowall%3Dtrue Excel file containing boundary maps: https://www.cell.com/cms/10.1016/j.celrep.2016.10.061/attachment/c3954472-3378-4fd7-aa33-9a2d085f7bbd/mmc4.xlsx |
-
Phenotype | Cell Types/Tissues | Cell Types/Tissues additional information | Chip-Seq Source for enhancer annotation | Enhancer annotation (based on Chip-Seq), Main Procedure | RNA-Seq Source | Gene Expression quantification main procedure | HiC – TAD maps Source | TAD maps main procedure | ||
Cardiovascular | Day 5 | Day 5 cardiac mesodermal cells, from 80 days ESC differentiation towards ventricular cardiomocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 5 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | |||||
Day 7 | Day 7 cardiac progenitors, from 80 days ESC differentiation towards ventricular cardiomocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 7 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Day 15 | Day 15 primitive cardiomyocytes, from 80 days ESC differentiation towards ventricular cardiomocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 15 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Day 80 | Day 80 ventricular cardiomiocytes, from 80 days ESC differentiation towards ventricular cardiomocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 80 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Head-Neck | Neural Crest Early | Neural Crest Data of hNCC differentiation This NC data corresponds with an early and heterogeneous stage of NC differentiation For +info: https://pubmed.ncbi.nlm.nih.gov/29719267/ | p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS (Peaks were called upon reads mapping with Bowtie2 with Macs2) | FPKMs obtained upon reads mapping with Tophat2 and gene expression quantification with Cufflinks | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ||||
Neural Crest Late | Neural Crest Data of hNCC differentiation P4 differentiation stage, it corresponds with a later and more homogeneous stage of NC differentation in comparison with Neural Crest Early For +info: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE70751 | p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS (Peaks were called upon reads mapping with Bowtie2 with Macs2) | FPKMs obtained upon reads mapping with Tophat2 and gene expression quantification with Cufflinks | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||||
PalateCS20 | Embryonic Palate Carnagie Stage 20 For +info: https://pubmed.ncbi.nlm.nih.gov/29719267/ | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called with Macs2 using the provided alignment files (tagAlign)) | To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Palate1 sample were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||||
Limbs | EmbryonicLimb1 | Embryonic Lower Limb data of entire limb buds, Carnegie Stages 14-19 For +info: https://www.nature.com/articles/s41467-020-17305-2 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | https://elifesciences.org/articles/15657
| To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Lower Limb samples were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||
EmbryonicLimb2 | Embryonic Upper Limb data of entire limb buds, Carnegie Stages 14-19 For +info: https://www.nature.com/articles/s41467-020-17305-2 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | https://elifesciences.org/articles/15657
| To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Upper Limb samples were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ||||
Neurodevelopmental | PfcGw15 | Brain prefrontal cortex Gestation Week 15 For +info: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were directly obtained from source) | From database: https://www.brainspan.org/static/home
| Expression values obtained from file: summarized to genes file Given that gestational age is 2 weeks longer than conceptional age, post conception week 13 (pcw13) samples information was considered to match with chip-seq gestation week 15 data Expression value computed as the average of the pcw13 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) cortex] and DFC(dorsolateral prefrontal cortex)
| Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in the referenced source. The Excelfile containing the boundary maps can be found here: | |||
PfcGw18 | Brain prefrontal cortex Gestation Week 18 For +info: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were directly obtained from source) | From database: https://www.brainspan.org/static/home
| Expression values obtained from file: summarized to genes file Given that gestational age is 2 weeks longer than conceptional age, post conception week 16 (pcw16) samples information was considered to match with chip-seq gestation week 18 data Expression value computed as the average of the pcw16 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) cortex] and DFC(dorsolateral prefrontal cortex)
| Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in the referenced source. The Excelfile containing the boundary maps can be found here: | ||||
Phenotype | Cell Types/Tissues | Cell Types/Tissues additional information | Chip-Seq Source for enhancer annotation | Enhancer annotation (based on Chip-Seq), Main Procedure | RNA-Seq Source | Gene Expression quantification main procedure | HiC – TAD maps Source | TAD maps main procedure | ||
Cardiovascular | Day 5 | Day 5 cardiac mesodermal cells, from 80 days ESC differentiation towards ventricular cardiomyocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 5 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | |||||
Day 7 | Day 7 cardiac progenitors, from 80 days ESC differentiation towards ventricular cardiomyocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 7 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Day 15 | Day 15 primitive cardiomyocytes, from 80 days ESC differentiation towards ventricular cardiomyocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 15 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Day 80 | Day 80 ventricular cardiomiocytes, from 80 days ESC differentiation towards ventricular cardiomyocytes For + info: https://www.nature.com/articles/s41588-019-0479-7 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | FPKMs directly obtained from source | TAD map generated upon processing Day 80 cardiomyocyte differentiation HiC data. TADs were called with the usage of the tool DomainCaller by considering the 50kb contact matrices in the .hic files | ||||||
Head-Neck | Neural Crest Early | Neural Crest Data of hNCC differentiation This NC data corresponds with an early and heterogeneous stage of NC differentiation For +info: https://pubmed.ncbi.nlm.nih.gov/22981823/ | p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS (Peaks were called upon reads mapping with Bowtie2 with Macs2) | FPKMs obtained upon reads mapping with Tophat2 and gene expression quantification with Cufflinks | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ||||
Neural Crest Late | Neural Crest Data of hNCC differentiation P4 differentiation stage, it corresponds with a later and more homogeneous stage of NC differentation in comparison with Neural Crest Early For +info: https://pubmed.ncbi.nlm.nih.gov/26365491/ | p300 peak, intersecting H3K27ac peak, with distance > 10Kb to protein coding TSS (Peaks were called upon reads mapping with Bowtie2 with Macs2) | FPKMs obtained upon reads mapping with Tophat2 and gene expression quantification with Cufflinks | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||||
PalateCS20 | Embryonic Palate Carnagie Stage 20 For +info: https://pubmed.ncbi.nlm.nih.gov/29719267/ | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called with Macs2 using the provided alignment files (tagAlign)) | To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Palate1 sample were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||||
Limbs | EmbryonicLimb1 | Embryonic Lower Limb data of entire limb buds, Carnegie Stages 14-19 For +info: https://www.nature.com/articles/s41467-020-17305-2 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | https://elifesciences.org/articles/15657
| To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Lower Limb samples were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | |||
EmbryonicLimb2 | Embryonic Upper Limb data of entire limb buds, Carnegie Stages 14-19 For +info: https://www.nature.com/articles/s41467-020-17305-2 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were called after reads mapping with Bowtie2 with Macs2) | https://elifesciences.org/articles/15657
| To obtain the FPKMs, first, the bedfiles containing the reads mapping coordinates (.unique.bed12) for Upper Limb samples were converted into bam files with bedToBam tool. Next FPKMs were computed with Cufflinks. | 3D genome browser: http://3dgenome.fsm.northwestern.edu/ TAD maps: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ESC TAD map directly obtained from: http://3dgenome.fsm.northwestern.edu/downloads/hg19.TADs.zip | ||||
Neurodevelopmental | PfcGw15 | Brain prefrontal cortex Gestation Week 15 For +info: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were directly obtained from source) | From database: https://www.brainspan.org/static/home
| Expression values obtained from file: summarized to genes file Given that gestational age is 2 weeks longer than conceptional age, post conception week 13 (pcw13) samples information was considered to match with chip-seq gestation week 15 data Expression value computed as the average of the pcw13 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) cortex] and DFC(dorsolateral prefrontal cortex)
| Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in the referenced source. The Excelfile containing the boundary maps can be found here: | |||
PfcGw18 | Brain prefrontal cortex Gestation Week 18 For +info: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149268 | H3K27ac peak with distance > 10Kb to protein coding TSS (Peaks were directly obtained from source) | From database: https://www.brainspan.org/static/home
| Expression values obtained from file: summarized to genes file Given that gestational age is 2 weeks longer than conceptional age, post conception week 16 (pcw16) samples information was considered to match with chip-seq gestation week 18 data Expression value computed as the average of the pcw16 samples: VFC (ventrolateral prefrontal cortex), MFC[anterior (rostral) cingulate (medial prefrontal) cortex] and DFC(dorsolateral prefrontal cortex)
| Brain Prefrontal Cortex TAD map, generated from CO (prefrontal cortex) boundary map provided in the referenced source. The Excelfile containing the boundary maps can be found here: | ||||