diff --git a/README.md b/README.md index 70ae474..db2c375 100755 --- a/README.md +++ b/README.md @@ -41,16 +41,16 @@ jump-diffusion propagator. It can be used for scale-free smoothing, fuzzy data segmentation and data filtering. ![cna gof](/images/cna.gof.png "CNA goodness of fit") -![cna gof](/images/cna.post.png "CNA posterior") ![baf gof](/images/baf.gof.png "BAF goodness of fit") -![cna gof](/images/baf.post.png "BAF posterior") +![cna post](/images/cna.post.png "CNA posterior") +![baf post](/images/baf.post.png "BAF posterior") +![snv gof](/images/snv.gof.png "SNV goodness of fit") Visualization of the cloneHD output for the simulated data set. From top to bottom: (i) the bias corrected read depth data and the cloneHD -posterior mean emission rate (ii) the total copy number posterior -distribution for subclone 1 with f1=0.52 and subclone 2 with f2=0.07 -(iii) the BAF and (iv) the minor allele posterior. (Plots created with Wolfram -[Mathematica](http://www.wolfram.com/mathematica/).) +posterior mean emission rate (ii) the BAF (iii-iv) the total and minor copy number posterior +distribution for subclone 1 with f1=0.52 and subclone 2 with f2=0.07 and (v) the SNV goodness of fit, where each SNV is assigned to a genotype according to the its posterior, and the observed SNV freequency is scaled by one half of the local mean total copynumber. +(All plots are created with Wolfram [Mathematica](http://www.wolfram.com/mathematica/).) # filterHD command line arguments diff --git a/images/baf.gof.png b/images/baf.gof.png index 0dd3a0a..3b7a908 100644 Binary files a/images/baf.gof.png and b/images/baf.gof.png differ diff --git a/images/baf.post.png b/images/baf.post.png index 927587e..138eb34 100644 Binary files a/images/baf.post.png and b/images/baf.post.png differ diff --git a/images/cna.gof.png b/images/cna.gof.png index 4837eac..2bdc690 100644 Binary files a/images/cna.gof.png and b/images/cna.gof.png differ diff --git a/images/cna.post.png b/images/cna.post.png index d0ac07f..a76f7a9 100644 Binary files a/images/cna.post.png and b/images/cna.post.png differ diff --git a/run-example.sh b/run-example.sh index cd50516..f763e28 100644 --- a/run-example.sh +++ b/run-example.sh @@ -6,16 +6,17 @@ export OMP_NUM_THREADS=4; part=$1 # input data -#data="./test/data/" -#results="./test/results/" -data="/Users/af7/Projects/cloneHD/Test/data/" -results="/Users/af7/Projects/cloneHD/Test/results/" +data="./test/data/" +results="./test/results/" +#data="/Users/af7/Projects/cloneHD/Test/data/" +#results="/Users/af7/Projects/cloneHD/Test/results/" filterHD="./build/filterHD" cloneHD="./build/cloneHD" normalCNA="${data}/normal.cna.txt" tumorCNA="${data}/tumor.cna.txt" tumorBAF="${data}/tumor.baf.txt" +tumorSNV="${data}/tumor.snv.txt" bias="${results}/normal.cna.posterior-1.txt" tumorCNAjumps="${results}/tumor.cna.bias.jumps.txt" tumorBAFjumps="${results}/tumor.baf.jumps.txt" @@ -59,9 +60,14 @@ then echo "*** cloneHD ***" echo cmd="$cloneHD --cna $tumorCNA --baf $tumorBAF --pre ${results}/tumor --bias $bias --seed 123 --trials 2\ - --nmax 3 --force --max-tcn 5 --cna-jumps $tumorCNAjumps --baf-jumps $tumorBAFjumps --min-jump 0.01 --restarts 10" + --nmax 3 --force --max-tcn 4 --cna-jumps $tumorCNAjumps --baf-jumps $tumorBAFjumps --min-jump 0.01 --restarts 10" echo $cmd $cmd echo cat ${results}/tumor.summary.txt + cmd="$cloneHD --snv $tumorSNV --pre ${results}/tumor --seed 123 --trials 2\ + --nmax 3 --force --max-tcn 4 --restarts 10 --mean-tcn ${results}/tumor.mean-tcn.txt --avail-cn ${results}/tumor.avail-cn.txt" + echo $cmd + $cmd + echo fi \ No newline at end of file diff --git a/src/filterHD.cpp b/src/filterHD.cpp index 7ef5193..ea03577 100755 --- a/src/filterHD.cpp +++ b/src/filterHD.cpp @@ -179,7 +179,7 @@ int main (int argc, const char * argv[]){ FILE * total_fp = fopen(buff,"w"); fprintf(total_fp, "#sample site mean std-dev jump-prob"); fprintf(total_fp, " posterior %.5e %.5e\n", myJD.myEmit->xmin, myJD.myEmit->xmax); - double gof=0, xobs=0,gofNorm=0; + double gof=0, xobs=0, gofNorm=0; for (int s=0; s < myJD.nSamples; s++){//get posterior distribution with the ML parameters myJD.get_posterior(s); double mstd = 0.0; @@ -202,6 +202,21 @@ int main (int argc, const char * argv[]){ if (opts.jumps==1){ jumps[s][l] *= exp(myJD.pnojump[s][l]); } + //goodness of fit... + if (mask==NULL || mask[s][l] == 1){ + xobs = double(myEmit.reads[t][s][l]) / double(myEmit.depths[t][s][l]); + if (opts.reflect) xobs = min(xobs,1.0-xobs); + double x=0,g=0,dg=0; + double b = (myEmit.bias == NULL) ? 1.0 : myEmit.bias[s][l]; + for (int i=0; i<=uidx; i++){ + x = myEmit.xgrid[i] * b; + dg = fabs(x - xobs) * post->data[i]; + if (i==0||i==uidx) dg *= 0.5; + g += dg; + } + gof += g * myEmit.dx; + gofNorm += 1.0; + } } mstd /= double(myJD.nSites[s]); //filter out data points which are not compatible with the emission model @@ -232,21 +247,6 @@ int main (int argc, const char * argv[]){ } } fprintf(total_fp,"\n"); - //goodness of fit... - if (mask==NULL || mask[s][l] == 1){ - xobs = double(myEmit.reads[t][s][l]) / double(myEmit.depths[t][s][l]); - if (opts.reflect) xobs = min(xobs,1.0-xobs); - double x=0,g=0,dg=0; - double b = (myEmit.bias == NULL) ? 1.0 : myEmit.bias[s][l]; - for (int i=0; i<=uidx; i++){ - x = myEmit.xgrid[i] * b; - dg = fabs(x - xobs) * post->data[i]; - if (i==0||i==uidx) dg *= 0.5; - g += dg; - } - gof += g * myEmit.dx; - gofNorm += 1.0; - } } gsl_matrix_free(myJD.gamma[s]); myJD.gamma[s] = NULL;