From f4777440be321808e6dd999941a9e03d804be9b6 Mon Sep 17 00:00:00 2001 From: Shane McCarthy Date: Tue, 12 May 2015 15:30:24 +0100 Subject: [PATCH] add a command -log to allow logging to a file --- pbwt.h | 1 + pbwtCore.c | 8 +++---- pbwtGeneticMap.c | 4 ++-- pbwtHtslib.c | 9 ++++--- pbwtIO.c | 38 ++++++++++++++--------------- pbwtImpute.c | 62 ++++++++++++++++++++++++------------------------ pbwtMain.c | 20 ++++++++++++---- pbwtMatch.c | 20 ++++++++-------- pbwtPaint.c | 4 ++-- utils.c | 14 +++++------ utils.h | 2 +- 11 files changed, 98 insertions(+), 84 deletions(-) diff --git a/pbwt.h b/pbwt.h index 13ec071..05f35b7 100644 --- a/pbwt.h +++ b/pbwt.h @@ -88,6 +88,7 @@ typedef struct { /* data structure for moving forwards - doesn't know PBWT */ /* pbwtMain.c */ extern char *commandLine ; /* a copy of the command line */ +extern FILE *logFilePtr ; /* log file pointer */ /* pbwtCore.c */ diff --git a/pbwtCore.c b/pbwtCore.c index 9be50f6..46d6801 100644 --- a/pbwtCore.c +++ b/pbwtCore.c @@ -98,7 +98,7 @@ PBWT *pbwtSubSites (PBWT *pOld, double fmin, double frac) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ; + fprintf (logFilePtr, "subsites with fmin %f, frac %f leaves %d sites\n", fmin, frac, pNew->N) ; pNew->chrom = pOld->chrom ; pOld->chrom = 0 ; pNew->samples = pOld->samples ; pOld->samples = 0 ; @@ -176,7 +176,7 @@ void pbwtBuildReverse (PBWT *p) /* save uR->a, which is the lexicographic order of the sequences */ if (!p->aRend) p->aRend = myalloc (M, int) ; memcpy (p->aRend, uR->a, M * sizeof(int)) ; - fprintf (stderr, "built reverse PBWT - size %ld\n", arrayMax(p->zz)) ; + fprintf (logFilePtr, "built reverse PBWT - size %ld\n", arrayMax(p->zz)) ; if (isCheck) /* print out the reversed haplotypes */ { FILE *fp = fopen ("rev.haps","w") ; @@ -622,7 +622,7 @@ PBWT *pbwtSelectSites (PBWT *pOld, Array sites, BOOL isKeepOld) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "%d sites selected from %d, pbwt size for %d haplotypes is %ld\n", + fprintf (logFilePtr, "%d sites selected from %d, pbwt size for %d haplotypes is %ld\n", pNew->N, pOld->N, pNew->M, arrayMax(pNew->yz)) ; if (isKeepOld) @@ -682,7 +682,7 @@ PBWT *pbwtRemoveSites (PBWT *pOld, Array sites, BOOL isKeepOld) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "%d sites selected from %d, pbwt size for %d haplotypes is %ld\n", + fprintf (logFilePtr, "%d sites selected from %d, pbwt size for %d haplotypes is %ld\n", pNew->N, pOld->N, pNew->M, arrayMax(pNew->yz)) ; if (isKeepOld) diff --git a/pbwtGeneticMap.c b/pbwtGeneticMap.c index 232ca77..7bed33c 100644 --- a/pbwtGeneticMap.c +++ b/pbwtGeneticMap.c @@ -86,7 +86,7 @@ void readGeneticMap (FILE *fp) buildMap () ; - fprintf (stderr, "read %d genetic map entries from %d, %f to %d, %f\n", + fprintf (logFilePtr, "read %d genetic map entries from %d, %f to %d, %f\n", n, arr(map.x, 0, int), arr(map.g, 0, double), arr(map.x, n-1, int), arr(map.g, n-1, double)) ; } @@ -153,7 +153,7 @@ void pbwt4hapsStats (PBWT *p) { if (!p || !p->sites) die ("hap4stats called without a PBWT with sites") ; if (!map.x) - { fprintf (stderr, "hap4stats called without a map - using a linear 1cM/Mb map\n") ; + { fprintf (logFilePtr, "hap4stats called without a map - using a linear 1cM/Mb map\n") ; map.x = arrayCreate (2, int) ; array(map.x,0,int) = arrp(p->sites,0,Site)->x ; array(map.x,1,int) = arrp(p->sites,arrayMax(p->sites)-1,Site)->x ; diff --git a/pbwtHtslib.c b/pbwtHtslib.c index 0f8f6c2..d68f8cc 100644 --- a/pbwtHtslib.c +++ b/pbwtHtslib.c @@ -162,8 +162,9 @@ PBWT *pbwtReadVcfGT (char *filename) /* read GTs from vcf/bcf using htslib */ free (x) ; pbwtCursorDestroy (u) ; free (xMissing) ; - fprintf (stderr, "read genotypes from %s\n", filename) ; - if (p->missingOffset) fprintf (stderr, "%ld missing values at %d sites\n", + fprintf (logFilePtr, "read genotypes from %s with %ld sample names and %ld sites on chromosome %s: M, N are %d, %d\n", + filename, arrayMax(p->samples)/2, arrayMax(p->sites), p->chrom, p->M, p->N) ; + if (p->missingOffset) fprintf (logFilePtr, "%ld missing values at %d sites\n", nMissing, nMissingSites) ; return p ; @@ -241,7 +242,7 @@ void pbwtWriteVcf (PBWT *p, char *filename, char *referenceFasta, char *mode) if (!fp) die ("could not open file for writing: %s", filename) ; if (!p) die ("pbwtWriteVcf called without a valid pbwt") ; if (!p->sites) die ("pbwtWriteVcf called without sites") ; - if (!p->samples) fprintf (stderr, "Warning: pbwtWriteVcf called without samples... using fake sample names PBWT0, PBWT1 etc...\n") ; + if (!p->samples) fprintf (logFilePtr, "Warning: pbwtWriteVcf called without samples... using fake sample names PBWT0, PBWT1 etc...\n") ; BOOL isDosage = p->dosageOffset ? TRUE : FALSE ; // write header @@ -389,6 +390,8 @@ void pbwtWriteVcf (PBWT *p, char *filename, char *referenceFasta, char *mode) bcf_hdr_destroy(bcfHeader) ; bcf_destroy1(bcfRecord); hts_close(fp) ; + + fprintf (logFilePtr, "written vcf file: %d records and %d samples\n", p->N, p->M/2) ; } /******* end of file ********/ diff --git a/pbwtIO.c b/pbwtIO.c index 5477b7f..15cd934 100644 --- a/pbwtIO.c +++ b/pbwtIO.c @@ -53,7 +53,7 @@ void pbwtWrite (PBWT *p, FILE *fp) /* just writes compressed pbwt in yz */ if (fwrite (arrp(p->yz, 0, uchar), sizeof(uchar), arrayMax(p->yz), fp) != arrayMax(p->yz)) die ("error writing data in pbwtWrite") ; - fprintf (stderr, "written %ld chars pbwt: M, N are %d, %d\n", arrayMax(p->yz), p->M, p->N) ; + fprintf (logFilePtr, "written %ld chars pbwt: M, N are %d, %d\n", arrayMax(p->yz), p->M, p->N) ; } void pbwtWriteSites (PBWT *p, FILE *fp) @@ -72,7 +72,7 @@ void pbwtWriteSites (PBWT *p, FILE *fp) } if (ferror (fp)) die ("error writing sites file") ; - fprintf (stderr, "written %d sites from %d to %d\n", p->N, + fprintf (logFilePtr, "written %d sites from %d to %d\n", p->N, arrp(p->sites, 0, Site)->x, arrp(p->sites, p->N-1, Site)->x) ; } @@ -91,7 +91,7 @@ void pbwtWriteSamples (PBWT *p, FILE *fp) } if (ferror (fp)) die ("error writing samples file") ; - fprintf (stderr, "written %d samples\n", p->M/2) ; + fprintf (logFilePtr, "written %d samples\n", p->M/2) ; } void writeDataOffset (FILE *fp, char *name, Array offset, Array data, int N) @@ -108,7 +108,7 @@ void writeDataOffset (FILE *fp, char *name, Array offset, Array data, int N) if (fwrite (arrp(offset, 0, long), sizeof(long), N, fp) != N) die ("error writing offsets in write %s", name) ; - fprintf (stderr, "written %ld chars compressed %s data\n", n, name) ; + fprintf (logFilePtr, "written %ld chars compressed %s data\n", n, name) ; } void pbwtWriteMissing (PBWT *p, FILE *fp) @@ -125,7 +125,7 @@ void pbwtWriteReverse (PBWT *p, FILE *fp) int* tstart = p->aFstart ; p->aFstart = p->aRstart ; int* tend = p->aFend ; p->aFend = p->aRend ; - fprintf (stderr, "reverse: ") ; pbwtWrite (p, fp) ; + fprintf (logFilePtr, "reverse: ") ; pbwtWrite (p, fp) ; p->yz = tz ; p->aFstart = tstart ; p->aFend = tend ; } @@ -200,7 +200,7 @@ PBWT *pbwtRead (FILE *fp) if (fread (arrp(p->yz, 0, uchar), sizeof(uchar), nz, fp) != nz) die ("error reading data in pbwt file") ; - fprintf (stderr, "read pbwt %s file with %ld bytes: M, N are %d, %d\n", tag, nz, p->M, p->N) ; + fprintf (logFilePtr, "read pbwt %s file with %ld bytes: M, N are %d, %d\n", tag, nz, p->M, p->N) ; return p ; } @@ -248,7 +248,7 @@ Array pbwtReadSitesFile (FILE *fp, char **chrom) if (ferror (fp)) die ("error reading sites file") ; - fprintf (stderr, "read %ld sites on chromosome %s from file\n", arrayMax(sites), *chrom) ; + fprintf (logFilePtr, "read %ld sites on chromosome %s from file\n", arrayMax(sites), *chrom) ; arrayDestroy (varTextArray) ; return sites ; @@ -324,7 +324,7 @@ Array pbwtReadSamplesFile (FILE *fp) /* for now assume all samples diploid */ } arrayDestroy (nameArray) ; - fprintf (stderr, "read %ld sample names\n", arrayMax(samples)) ; + fprintf (logFilePtr, "read %ld sample names\n", arrayMax(samples)) ; return samples ; } @@ -357,7 +357,7 @@ static void readDataOffset (FILE *fp, char *name, Array *offset, Array *data, in if (fread (arrp(*data, 0, uchar), sizeof(uchar), n, fp) != n) die ("error reading zMissing in pbwtReadMissing") ; arrayMax(*data) = n ; - fprintf (stderr, "read %ld chars compressed missing data\n", n) ; + fprintf (logFilePtr, "read %ld chars compressed missing data\n", n) ; *offset = arrayReCreate (*offset, N, long) ; if (dummy != -1) /* old version with ints not longs */ @@ -469,9 +469,9 @@ PBWT *pbwtReadMacs (FILE *fp) } pbwtCursorToAFend (u, p) ; - fprintf (stderr, "read MaCS file: M, N are\t%d\t%d\n", M, p->N) ; + fprintf (logFilePtr, "read MaCS file: M, N are\t%d\t%d\n", M, p->N) ; if (isStats) - fprintf (stderr, " xtot, ytot are\t%d\t%d\n", nxTot, nyTot) ; + fprintf (logFilePtr, " xtot, ytot are\t%d\t%d\n", nxTot, nyTot) ; free(x) ; pbwtCursorDestroy (u) ; @@ -563,9 +563,9 @@ static PBWT *pbwtReadLineFile (FILE *fp, char* type, ParseLineFunc parseLine) } pbwtCursorToAFend (u, p) ; - fprintf (stderr, "read %s file", type) ; - if (p->chrom) fprintf (stderr, " for chromosome %s", p->chrom) ; - fprintf (stderr, ": M, N are\t%d\t%d; yz length is %ld\n", p->M, p->N, arrayMax(p->yz)) ; + fprintf (logFilePtr, "read %s file", type) ; + if (p->chrom) fprintf (logFilePtr, " for chromosome %s", p->chrom) ; + fprintf (logFilePtr, ": M, N are\t%d\t%d; yz length is %ld\n", p->M, p->N, arrayMax(p->yz)) ; arrayDestroy(xArray) ; pbwtCursorDestroy (u) ; @@ -665,7 +665,7 @@ PBWT *pbwtReadGen (FILE *fp, char *chrom) nGenMissing = 0 ; PBWT *p = pbwtReadLineFile (fp, "gen", parseGenLine) ; p->chrom = strdup (chrom) ; - if (nGenMissing) fprintf (stderr, "%ld missing genotypes set to 00\n", nGenMissing) ; + if (nGenMissing) fprintf (logFilePtr, "%ld missing genotypes set to 00\n", nGenMissing) ; return p ; } @@ -722,9 +722,9 @@ PBWT *pbwtReadPhase (FILE *fp) /* Li and Stephens PHASE format */ } pbwtCursorToAFend (u, p) ; - fprintf (stderr, "read phase file") ; - if (p->chrom) fprintf (stderr, " for chromosome %s", p->chrom) ; - fprintf (stderr, ": M, N are\t%d\t%d; yz length is %ld\n", p->M, p->N, arrayMax(p->yz)) ; + fprintf (logFilePtr, "read phase file") ; + if (p->chrom) fprintf (logFilePtr, " for chromosome %s", p->chrom) ; + fprintf (logFilePtr, ": M, N are\t%d\t%d; yz length is %ld\n", p->M, p->N, arrayMax(p->yz)) ; for (i = 0 ; i < p->N ; ++i) free(data[i]) ; free (data) ; pbwtCursorDestroy (u) ; @@ -750,7 +750,7 @@ void pbwtWriteHaplotypes (FILE *fp, PBWT *p) } free (hap) ; pbwtCursorDestroy (u) ; - fprintf (stderr, "written haplotype file: %d rows of %d\n", p->N, M) ; + fprintf (logFilePtr, "written haplotype file: %d rows of %d\n", p->N, M) ; } /*************** write IMPUTE files ********************/ diff --git a/pbwtImpute.c b/pbwtImpute.c index 114bd38..3755bba 100644 --- a/pbwtImpute.c +++ b/pbwtImpute.c @@ -65,7 +65,7 @@ void imputeExplore (PBWT *p, int test) x[u->a[i]] = u->y[i] ; for (i = 0 ; i < M ; ++i) if (x[uz->a[i]] != uz->y[i]) - fprintf (stderr, "forward-backward mismatch at k %d i %d\n", k, i) ; + fprintf (logFilePtr, "forward-backward mismatch at k %d i %d\n", k, i) ; } if (k > 0.2*N && k < 0.8*N) /* ignore ends */ { f = (M - u->c) / (double)M ; for (ff = 0 ; f*100 > fBound[ff] ; ++ff) ; @@ -200,15 +200,15 @@ static void phaseCompare (PBWT *p, PBWT *q) } } if (isCheck && (xp[i]+xp[i+1] != xq[i]+xq[i+1])) - { fprintf (stderr, "phaseCompare mismatch k %d sequence %d\np", k, i) ; + { fprintf (logFilePtr, "phaseCompare mismatch k %d sequence %d\np", k, i) ; uchar **pHaps = pbwtHaplotypes(p), **qHaps = pbwtHaplotypes(q) ; int kk ; for (kk = 0 ; kk < 40 ; ++kk) - fprintf (stderr, " %d", pHaps[i][kk] + pHaps[i+1][kk]) ; - fprintf (stderr, "\nq") ; + fprintf (logFilePtr, " %d", pHaps[i][kk] + pHaps[i+1][kk]) ; + fprintf (logFilePtr, "\nq") ; for (kk = 0 ; kk < 40 ; ++kk) - fprintf (stderr, " %d", qHaps[i][kk] + qHaps[i+1][kk]) ; - fprintf (stderr, "\n") ; + fprintf (logFilePtr, " %d", qHaps[i][kk] + qHaps[i+1][kk]) ; + fprintf (logFilePtr, "\n") ; die ("phaseCompare mismatch: k %d, i %d, xp %d|%d, xq %d|%d", k, i, xp[i], xp[i+1], xq[i], xq[i+1]) ; } @@ -217,7 +217,7 @@ static void phaseCompare (PBWT *p, PBWT *q) pbwtCursorForwardsRead (uq) ; } - fprintf (stderr, "%.1f switches per sample, %.3f per het, %.1f nSwitch1, %.1f nSwitch5\n", + fprintf (logFilePtr, "%.1f switches per sample, %.3f per het, %.1f nSwitch1, %.1f nSwitch5\n", mFac*nSwitch, nSwitch/(double)nHet, mFac*nSwitch1, mFac*nSwitch5) ; if (isStats) @@ -383,13 +383,13 @@ PBWT *phase (PBWT *p, int nSparse) /* return rephased p */ if (isCheck) /* flip p->zz round into p->yz and compare to r */ { Array yzStore = p->yz ; p->yz = p->zz ; int *aFstartStore = p->aFstart ; p->aFstart = p->aRstart ; - fprintf (stderr, "After reverse pass: ") ; phaseCompare (p, r) ; + fprintf (logFilePtr, "After reverse pass: ") ; phaseCompare (p, r) ; p->yz = yzStore ; p->aFstart = aFstartStore ; } PBWT *q = phaseSweep (p, 0, TRUE, r, nSparse) ; /* compare new phasing to original and report switch rates */ - fprintf (stderr, "After forward pass: ") ; phaseCompare (p, q) ; + fprintf (logFilePtr, "After forward pass: ") ; phaseCompare (p, q) ; pbwtDestroy (p) ; return q ; @@ -408,7 +408,7 @@ PBWT *referencePhase0 (PBWT *p, PBWT *pRef) { if (!p->zz) pbwtBuildReverse (p) ; Array yzStore = p->yz ; p->yz = p->zz ; int *aFstartStore = p->aFstart ; p->aFstart = p->aRstart ; - fprintf (stderr, "After reverse pass: ") ; phaseCompare (p, r) ; + fprintf (logFilePtr, "After reverse pass: ") ; phaseCompare (p, r) ; p->yz = yzStore ; p->aFstart = aFstartStore ; } PBWT *q = phaseSweep (p, pRef, TRUE, r, nSparse) ; @@ -904,7 +904,7 @@ static inline int phaseExtend (int x0, int x1, PbwtCursor *uRef, int j0, static PBWT *referencePhase4 (PBWT *pOld, PBWT *pRef) { - fprintf (stderr, "Reference phase with extension method %s\n", extendMethodText) ; + fprintf (logFilePtr, "Reference phase with extension method %s\n", extendMethodText) ; int i, j, jq, k ; PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ; uchar *xOld = myalloc (pOld->M, uchar) ; @@ -1002,7 +1002,7 @@ static PBWT *referencePhase4 (PBWT *pOld, PBWT *pRef) pbwtCursorForwardsReadAD (uRef, k) ; } - fprintf (stderr, "traceBackHeap final %ld, max %ld\n", + fprintf (logFilePtr, "traceBackHeap final %ld, max %ld\n", arrayMax(traceBackHeap)-arrayMax(traceBackFreeStack), arrayMax(traceBackHeap)) ; /* now do the traceback - we write first into the reverse pbwt for pNew */ @@ -1057,7 +1057,7 @@ static PBWT *referencePhase4 (PBWT *pOld, PBWT *pRef) /* reporting */ if (isCheck) for (jq = 0 ; jq < pOld->M ; jq +=2 ) - fprintf (stderr, "jq %d, nHets %d, liveAv %.2f, likeAv %.2f\n", jq, + fprintf (logFilePtr, "jq %d, nHets %d, liveAv %.2f, likeAv %.2f\n", jq, checkHets[jq], checkLiveSum[jq]/(double)pRef->N, exp(checkLikeSum[jq]/pRef->N)) ; /* cleanup */ @@ -1076,7 +1076,7 @@ static PBWT *referencePhase4 (PBWT *pOld, PBWT *pRef) PBWT *referencePhase (PBWT *pOld, char *fileNameRoot) { - fprintf (stderr, "phase against reference %s\n", fileNameRoot) ; + fprintf (logFilePtr, "phase against reference %s\n", fileNameRoot) ; if (pOld->M % 2) die ("phase requires that M = %d is even", pOld->M) ; if (!pOld || !pOld->yz || !pOld->sites) die ("referencePhase called without existing pbwt with sites") ; @@ -1091,10 +1091,10 @@ PBWT *referencePhase (PBWT *pOld, char *fileNameRoot) pRef = pbwtSelectSites (pRef, pOld->sites, FALSE) ; if (!pOld->N) die ("no overlapping sites in referencePhase") ; - fprintf (stderr, "Phase preliminaries: ") ; timeUpdate() ; + fprintf (logFilePtr, "Phase preliminaries: ") ; timeUpdate(logFilePtr) ; PBWT *pNew = referencePhase4 (pOld, pRef) ; - fprintf (stderr, "Phasing complete: ") ; timeUpdate() ; - fprintf (stderr, "After phasing: ") ; phaseCompare (pNew, pOld) ; + fprintf (logFilePtr, "Phasing complete: ") ; timeUpdate(logFilePtr) ; + fprintf (logFilePtr, "After phasing: ") ; phaseCompare (pNew, pOld) ; pNew->chrom = pOld->chrom ; pOld->chrom = 0 ; pNew->sites = pOld->sites ; pOld->sites = 0 ; @@ -1132,8 +1132,8 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, { int i, j, k ; - fprintf (stderr, "Reference impute using maximal matches: ") ; - if (nSparse > 1) fprintf (stderr, "(nSparse = %d, fSparse = %.2f) ", nSparse, fSparse) ; + fprintf (logFilePtr, "Reference impute using maximal matches: ") ; + if (nSparse > 1) fprintf (logFilePtr, "(nSparse = %d, fSparse = %.2f) ", nSparse, fSparse) ; /* build the array of maximal matches into pFrame for each sequence in pOld */ maxMatch = myalloc (pOld->M, Array) ; @@ -1149,7 +1149,7 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, sizeof(MatchSegment), matchSegmentCompare)) die ("error %d in mergesort", errno) ; /* mergesort() because they are close to being already sorted */ - if (isCheck) fprintf (stderr, "%ld matches found to query %d\n", + if (isCheck) fprintf (logFilePtr, "%ld matches found to query %d\n", arrayMax(maxMatch[j]), j) ; /* add an end marker */ MatchSegment *ms = arrayp(maxMatch[j],arrayMax(maxMatch[j]),MatchSegment) ; @@ -1243,7 +1243,7 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, } pbwtCursorToAFend (uNew, pNew) ; - if (nConflicts) fprintf (stderr, "%d times where no overlapping matches because query does not match any reference - set imputed value to 0\n", nConflicts) ; + if (nConflicts) fprintf (logFilePtr, "%d times where no overlapping matches because query does not match any reference - set imputed value to 0\n", nConflicts) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uRef) ; pbwtCursorDestroy (uNew) ; free (aRefInv) ; free (firstSeg) ; @@ -1257,7 +1257,7 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, PBWT *referenceImpute (PBWT *pOld, char *fileNameRoot, int nSparse, double fSparse) { /* Preliminaries */ - fprintf (stderr, "impute against reference %s\n", fileNameRoot) ; + fprintf (logFilePtr, "impute against reference %s\n", fileNameRoot) ; if (!pOld || !pOld->yz || !pOld->sites) die ("referenceImpute called without existing pbwt with sites") ; PBWT *pRef = pbwtReadAll (fileNameRoot) ; @@ -1268,7 +1268,7 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, /* identify the intersecting sites */ PBWT *pFrame = pbwtSelectSites (pRef, pOld->sites, TRUE) ; /* keep the full ref to impute to */ if (pFrame->N == pRef->N) - { fprintf (stderr, "No additional sites to impute in referenceImpute\n") ; + { fprintf (logFilePtr, "No additional sites to impute in referenceImpute\n") ; pbwtDestroy (pFrame) ; pbwtDestroy (pRef) ; return pOld ; } @@ -1277,7 +1277,7 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, if (!pOld->N) die ("no overlapping sites in referenceImpute") ; if (!pOld->aFend) die ("pOld has no aFend in referenceImpute - your pbwt was made by a previous version of the code; buildReverse and resave the forwards pbwt") ; - fprintf (stderr, "Imputation preliminaries: ") ; timeUpdate() ; + fprintf (logFilePtr, "Imputation preliminaries: ") ; timeUpdate(logFilePtr) ; if (isStats) { pImp = myalloc (pRef->N, double*) ; @@ -1298,11 +1298,11 @@ static PBWT *referenceImpute3 (PBWT *pOld, PBWT *pRef, PBWT *pFrame, for (j = 0 ; j < pNew->M ; ++j) ++his[ff][(int)(pImp[k][j]*10)] ; } for (ff = 0 ; ff < 17 ; ++ff) - { fprintf (stderr, "%5.1f", fBound[ff]) ; + { fprintf (logFilePtr, "%5.1f", fBound[ff]) ; double tot = 0.0 ; for (j = 10 ; j-- ;) tot += his[ff][j] ; for (j = 0 ; j < 10 ; ++j) - fprintf (stderr, " %8.5f", his[ff][j]/tot) ; - fprintf (stderr, "\n") ; + fprintf (logFilePtr, " %8.5f", his[ff][j]/tot) ; + fprintf (logFilePtr, "\n") ; } } @@ -1366,7 +1366,7 @@ PBWT *imputeMissing (PBWT *pOld) void genotypeCompare (PBWT *p, char *fileNameRoot) { - fprintf (stderr, "compare genotypes to reference %s\n", fileNameRoot) ; + fprintf (logFilePtr, "compare genotypes to reference %s\n", fileNameRoot) ; if (!p || !p->yz || !p->sites) die ("genotypeCompare called without existing pbwt with sites") ; PBWT *pRef = pbwtReadAll (fileNameRoot) ; @@ -1516,7 +1516,7 @@ PBWT *pbwtCorruptSites (PBWT *pOld, double pSite, double pChange) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "corruptSites with pSite %f, pChange %f changes %.4f of values\n", + fprintf (logFilePtr, "corruptSites with pSite %f, pChange %f changes %.4f of values\n", pSite, pChange, nChange/(N*(double)M)) ; pNew->sites = pOld->sites ; pOld->sites = 0 ; @@ -1564,7 +1564,7 @@ PBWT *pbwtCorruptSamples (PBWT *pOld, double pSample, double pChange) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "corruptSamples with pSample %f, pChange %f changes %.4f of values\n", + fprintf (logFilePtr, "corruptSamples with pSample %f, pChange %f changes %.4f of values\n", pSample, pChange, nChange/(N*(double)M)) ; pNew->sites = pOld->sites ; pOld->sites = 0 ; @@ -1599,7 +1599,7 @@ PBWT *pbwtCopySamples (PBWT *pOld, int Mnew, double meanLength) } pbwtCursorToAFend (uNew, pNew) ; - fprintf (stderr, "copySamples made %d samples with mean switch length %.1f\n", + fprintf (logFilePtr, "copySamples made %d samples with mean switch length %.1f\n", Mnew, meanLength) ; pNew->sites = pOld->sites ; pOld->sites = 0 ; diff --git a/pbwtMain.c b/pbwtMain.c index 061c256..339301e 100644 --- a/pbwtMain.c +++ b/pbwtMain.c @@ -96,7 +96,7 @@ static void exportSiteInfo (PBWT *p, FILE *fp, int f1, int f2) pbwtCursorForwardsReadAD (u, i) ; } pbwtCursorDestroy (u) ; - fprintf (stderr, "%d rows exported with allele count f, %d <= f < %d\n", n, f1, f2) ; + fprintf (logFilePtr, "%d rows exported with allele count f, %d <= f < %d\n", n, f1, f2) ; } /************ AF distribution **************/ @@ -114,7 +114,7 @@ static void siteFrequencySpectrum (PBWT *p) 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000 } ; - timeUpdate() ; + timeUpdate(logFilePtr) ; FILE *fp ; if (p->sites) { fp = fopen ("sites.freq", "w") ; if (!fp) die ("can't open sites.freq") ; } @@ -164,14 +164,18 @@ static void recordCommandLine (int argc, char *argv[]) /* a couple of utilities for opening/closing files */ -#define FOPEN(name,mode) if (!strcmp (argv[1], "-")) fp = !strcmp(mode,"r") ? stdin : stdout ; else if (!(fp = fopen (argv[1],mode))) die ("failed to open %s file", name, argv[1]) +#define FOPEN(name,mode) if (!strcmp (argv[1], "-")) fp = !strcmp(mode,"r") ? stdin : stdout ; else if (!(fp = fopen (argv[1],mode))) die ("failed to open %s file %s", name, argv[1]) #define FCLOSE if (strcmp(argv[1], "-")) fclose(fp) +#define LOGOPEN(name) if (!strcmp (argv[1], "-")) logFilePtr = stderr ; else if (!(logFilePtr = fopen (argv[1],"w"))) die ("failed to open %s file %s", name, argv[1]) +#define LOGCLOSE if (logFilePtr && !(logFilePtr==stderr)) fclose(logFilePtr) const char *pbwtCommitHash(void) { return PBWT_COMMIT_HASH ; } +FILE *logFilePtr ; /* log file pointer */ + int main (int argc, char *argv[]) { FILE *fp ; @@ -179,6 +183,8 @@ int main (int argc, char *argv[]) Array test ; char *referenceFasta = NULL; + logFilePtr = stderr ; + pbwtInit () ; --argc ; ++argv ; @@ -191,6 +197,7 @@ int main (int argc, char *argv[]) fprintf (stderr, "Contact: Richard Durbin [rd@sanger.ac.uk]\n") ; fprintf (stderr, "Usage: pbwt [ - [options]* ]+\n") ; fprintf (stderr, "Commands:\n") ; + fprintf (stderr, " -log log file; '-' for stderr\n") ; fprintf (stderr, " -check do various checks\n") ; fprintf (stderr, " -stats print stats depending on commands; writes to stdout\n") ; fprintf (stderr, " -read read pbwt file; '-' for stdin\n") ; @@ -257,7 +264,7 @@ int main (int argc, char *argv[]) fprintf (stderr, " -4hapsStats mu:rho 4 hap test stats\n") ; } - timeUpdate() ; + timeUpdate(logFilePtr) ; while (argc) { if (!(**argv == '-')) die ("not well formed command %s\nType pbwt without arguments for help", *argv) ; @@ -278,6 +285,8 @@ int main (int argc, char *argv[]) free(files); argc -= nfiles+1 ; argv += nfiles+1 ; } + else if (!strcmp (argv[0], "-log") && argc > 1) + { LOGCLOSE ; LOGOPEN("log") ; argc -= 2 ; argv += 2 ; } else if (!strcmp (argv[0], "-haps") && argc > 1) { FOPEN("haps","w") ; pbwtWriteHaplotypes (fp, p) ; FCLOSE ; argc -= 2 ; argv += 2 ; } else if (!strcmp (argv[0], "-read") && argc > 1) @@ -437,12 +446,13 @@ int main (int argc, char *argv[]) { p = playGround (p) ; argc -= 1 ; argv += 1 ; } else die ("unrecognised command %s\nType pbwt without arguments for help", *argv) ; - timeUpdate() ; + timeUpdate(logFilePtr) ; } if (p) pbwtDestroy(p) ; if (variationDict) dictDestroy(variationDict); sampleDestroy(); fgetword (NULL) ; // to keep valgrind happy, free malloced memory + LOGCLOSE ; return 0 ; } diff --git a/pbwtMatch.c b/pbwtMatch.c index cc79bab..1e1e11d 100644 --- a/pbwtMatch.c +++ b/pbwtMatch.c @@ -172,8 +172,8 @@ void pbwtLongMatches (PBWT *p, int L) /* reporting threshold L - if 0 then maxim hTot += arr(matchLengthHist, i, int) * i ; printf ("%d\t%d\n", i, arr(matchLengthHist, i, int)) ; } - fprintf (stderr, "Average %.1f matches per sample\n", nTot/(double)p->M) ; - fprintf (stderr, "Average length %.1f\n", hTot/(double)nTot) ; + fprintf (logFilePtr, "Average %.1f matches per sample\n", nTot/(double)p->M) ; + fprintf (logFilePtr, "Average length %.1f\n", hTot/(double)nTot) ; } pbwtCursorDestroy (u) ; @@ -203,7 +203,7 @@ void matchSequencesNaive (PBWT *p, FILE *fp) /* fp is a pbwt file of sequences t if (q->N != p->N) die ("query length in matchSequences %d != PBWT length %d", q->N, p->N) ; - fprintf (stderr, "Made haplotypes: ") ; timeUpdate () ; + fprintf (logFilePtr, "Made haplotypes: ") ; timeUpdate (logFilePtr) ; if (isCheck) { checkHapsA = query ; checkHapsB = reference ; Ncheck = p->N ; } @@ -237,7 +237,7 @@ void matchSequencesNaive (PBWT *p, FILE *fp) /* fp is a pbwt file of sequences t } } - fprintf (stderr, "Average number of best matches %.1f, Average length %.1f\n", + fprintf (logFilePtr, "Average number of best matches %.1f, Average length %.1f\n", nTot/(double)q->M, totLen/(double)nTot) ; pbwtDestroy (q) ; @@ -284,7 +284,7 @@ void matchSequencesIndexed (PBWT *p, FILE *fp) memcpy (d[k], up->d, (M+1)*sizeof(int)) ; pbwtCursorDestroy (up) ; - fprintf (stderr, "Made haplotypes and indices: ") ; timeUpdate () ; + fprintf (logFilePtr, "Made haplotypes and indices: ") ; timeUpdate (logFilePtr) ; if (isCheck) { checkHapsA = query ; checkHapsB = reference ; Ncheck = p->N ; } @@ -325,7 +325,7 @@ void matchSequencesIndexed (PBWT *p, FILE *fp) ++nTot ; totLen += k-e ; } - fprintf (stderr, "Average number of best matches %.1f, Average length %.1f\n", + fprintf (logFilePtr, "Average number of best matches %.1f, Average length %.1f\n", nTot/(double)q->M, totLen/(double)nTot) ; /* cleanup */ @@ -402,7 +402,7 @@ void matchSequencesSweep (PBWT *p, PBWT *q, void (*report)(int ai, int bi, int s else ++iPlus ; dPlus = (iPlus == p->M) ? k : up->d[iPlus] ; if (!iMinus && iPlus == p->M) - { fprintf (stderr, "no match to query %d value %d at site %d\n", + { fprintf (logFilePtr, "no match to query %d value %d at site %d\n", jj, x, k) ; d[jj] = k+1 ; goto DONE ; @@ -434,7 +434,7 @@ void matchSequencesSweep (PBWT *p, PBWT *q, void (*report)(int ai, int bi, int s nTot += (i - f[jj]) ; totLen += (p->N - d[jj])*(i - f[jj]) ; } - fprintf (stderr, "Average number of best matches including alternates %.1f, Average length %.1f, Av number per position %.1f\n", + fprintf (logFilePtr, "Average number of best matches including alternates %.1f, Average length %.1f, Av number per position %.1f\n", nTot/(double)q->M, totLen/(double)nTot, totLen/(double)(q->M*q->N)) ; pbwtCursorDestroy (up) ; pbwtCursorDestroy (uq) ; @@ -490,7 +490,7 @@ static void reportAndUpdate (int j, int k, uchar x, PbwtCursor *up, int *f, int else ++iPlus ; dPlus = (iPlus < up->M) ? up->d[iPlus] : (isSparse ? k/nSparseStore : k) ; if (!iMinus && iPlus == up->M) - { fprintf (stderr, "no match to query %d value %d at site %d\n", j, x, k) ; + { fprintf (logFilePtr, "no match to query %d value %d at site %d\n", j, x, k) ; d[j] = 1 + (isSparse ? k/nSparseStore : k) ; return ; } @@ -588,7 +588,7 @@ void matchSequencesSweepSparse (PBWT *p, PBWT *q, int nSparse, nTot += (i - ff[kk][jj]) ; totLen += (p->N - dd[kk][jj])*(i - ff[kk][jj]) ; } - fprintf (stderr, "Average number of best matches including alternates %.1f, Average length %.1f, Av number per position %.1f\n", + fprintf (logFilePtr, "Average number of best matches including alternates %.1f, Average length %.1f, Av number per position %.1f\n", nTot/(double)q->M, totLen/(double)nTot, totLen/(double)(q->M*q->N)) ; pbwtCursorDestroy (up) ; pbwtCursorDestroy (uq) ; diff --git a/pbwtPaint.c b/pbwtPaint.c index d7cdde1..1763056 100644 --- a/pbwtPaint.c +++ b/pbwtPaint.c @@ -159,11 +159,11 @@ void paintAncestryMatrix (PBWT *p, char* fileRoot,int chunksperregion) fputc ('\n', fc2) ; fputc ('\n', fc3) ; if (isCheck && (i%2) && p->samples) - fprintf (stderr, "%s %8.4g %8.4g\n", + fprintf (logFilePtr, "%s %8.4g %8.4g\n", sampleName (sample(p,i-1)), totCounts[i-1], totCounts[i]) ; } fclose (fc) ; fclose (fl) ; fclose (fc2) ;fclose (fc3) ; - timeUpdate(); + timeUpdate(logFilePtr); /* clean up */ for (i = 0 ; i < Ninds ; ++i) { free (counts[i]) ; free (counts2[i]) ; } free (counts) ; free (counts2) ; free (counts3) ; free (totCounts) ; free (nregions); free(totlengths); diff --git a/utils.c b/utils.c index c29456b..04509b6 100644 --- a/utils.c +++ b/utils.c @@ -38,7 +38,7 @@ void die (char *format, ...) fprintf (stderr, "\n") ; va_end (args) ; - timeUpdate () ; + timeUpdate (stderr) ; exit (-1) ; } @@ -170,7 +170,7 @@ struct timeval { #endif /* RUSAGE STRUCTURE_DEFINITIONS */ -void timeUpdate (void) +void timeUpdate (FILE *f) { static BOOL isFirst = TRUE ; static struct rusage rOld ; @@ -182,14 +182,14 @@ void timeUpdate (void) { secs = rNew.ru_utime.tv_sec - rOld.ru_utime.tv_sec ; usecs = rNew.ru_utime.tv_usec - rOld.ru_utime.tv_usec ; if (usecs < 0) { usecs += 1000000 ; secs -= 1 ; } - fprintf (stderr, "user\t%d.%06d", secs, usecs) ; + fprintf (f, "user\t%d.%06d", secs, usecs) ; secs = rNew.ru_stime.tv_sec - rOld.ru_stime.tv_sec ; usecs = rNew.ru_stime.tv_usec - rOld.ru_stime.tv_usec ; if (usecs < 0) { usecs += 1000000 ; secs -= 1 ; } - fprintf (stderr, "\tsystem\t%d.%06d", secs, usecs) ; - fprintf (stderr, "\tmax_RSS\t%ld", rNew.ru_maxrss - rOld.ru_maxrss) ; - fprintf (stderr, "\tMemory\t%li", totalAllocated) ; - fputc ('\n', stderr) ; + fprintf (f, "\tsystem\t%d.%06d", secs, usecs) ; + fprintf (f, "\tmax_RSS\t%ld", rNew.ru_maxrss - rOld.ru_maxrss) ; + fprintf (f, "\tMemory\t%li", totalAllocated) ; + fputc ('\n', f) ; } else isFirst = FALSE ; diff --git a/utils.h b/utils.h index b175a0d..820ba37 100644 --- a/utils.h +++ b/utils.h @@ -53,6 +53,6 @@ void *_mycalloc (long number, int size) ; FILE *fopenTag (char* root, char* tag, char* mode) ; gzFile gzopenTag (char* root, char* tag, char* mode) ; char *fgetword (FILE *f) ; /* not threadsafe */ -void timeUpdate (void) ; /* report to stderr resources used since last called */ +void timeUpdate (FILE *f) ; /* report to stderr resources used since last called */ /************************/