From 3b1844c3fbfb292c7bbf83d3f4aa53565dbbaae7 Mon Sep 17 00:00:00 2001 From: Benjamin Chu Date: Tue, 20 Feb 2024 11:42:24 -0800 Subject: [PATCH] add zenodo download link in docs, minor doc edits --- docs/src/man/documentation.ipynb | 14 +++++++------- docs/src/man/documentation.md | 14 +++++++------- docs/src/man/download.ipynb | 4 ++-- docs/src/man/download.md | 4 ++-- docs/src/man/examples.ipynb | 26 +++++++++++++------------- docs/src/man/examples.md | 26 +++++++++++++------------- docs/src/man/getting_started.ipynb | 2 +- docs/src/man/getting_started.md | 2 +- docs/src/man/julia.ipynb | 2 +- docs/src/man/julia.md | 2 +- 10 files changed, 48 insertions(+), 48 deletions(-) diff --git a/docs/src/man/documentation.ipynb b/docs/src/man/documentation.ipynb index 7840e579..df48aefa 100644 --- a/docs/src/man/documentation.ipynb +++ b/docs/src/man/documentation.ipynb @@ -26,13 +26,13 @@ "source": [ "## Required inputs\n", "\n", - "| Option name | Argument | Default | Description |\n", - "| :--- | :----: | :---: | :--- |\n", - "| `--zfile` | String | NA | Input file containing Z-scores |\n", - "| `--LD-files` | String | NA | Input directory to the pre-processed LD files |\n", - "| `--N` | Int | NA | Sample size for target (original) study |\n", - "| `--genome-build` | Int | NA | The human genome build used for SNP positions in `zfile` (this value must be 19 or 38) |\n", - "| `--out` | String | NA | Output file name (without extensions) |" + "| Option name | Argument | Description |\n", + "| :--- | :----: | :--- |\n", + "| `--zfile` | String | Input file containing Z-scores as well as CHR/POS/REF/ALT. See [Acceptable Z-score files](https://biona001.github.io/GhostKnockoffGWAS/dev/man/zfile) for detailed requirement on this file. |\n", + "| `--LD-files` | String | Input directory to the pre-processed LD files. Most users downloads this from the [Downloads Page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) |\n", + "| `--N` | Int | Sample size for target (original) study |\n", + "| `--genome-build` | Int | The human genome build used for SNP positions in `zfile` (this value must be 19 or 38) |\n", + "| `--out` | String | Output file name (without extensions) |" ] }, { diff --git a/docs/src/man/documentation.md b/docs/src/man/documentation.md index 5c969249..fd8781ce 100644 --- a/docs/src/man/documentation.md +++ b/docs/src/man/documentation.md @@ -11,13 +11,13 @@ GhostKnockoffGWAS --zfile example_zfile.txt --LD-files EUR --N 506200 --genome-b ## Required inputs -| Option name | Argument | Default | Description | -| :--- | :----: | :---: | :--- | -| `--zfile` | String | NA | Input file containing Z-scores | -| `--LD-files` | String | NA | Input directory to the pre-processed LD files | -| `--N` | Int | NA | Sample size for target (original) study | -| `--genome-build` | Int | NA | The human genome build used for SNP positions in `zfile` (this value must be 19 or 38) | -| `--out` | String | NA | Output file name (without extensions) | +| Option name | Argument | Description | +| :--- | :----: | :--- | +| `--zfile` | String | Input file containing Z-scores as well as CHR/POS/REF/ALT. See [Acceptable Z-score files](https://biona001.github.io/GhostKnockoffGWAS/dev/man/zfile) for detailed requirement on this file. | +| `--LD-files` | String | Input directory to the pre-processed LD files. Most users downloads this from the [Downloads Page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) | +| `--N` | Int | Sample size for target (original) study | +| `--genome-build` | Int | The human genome build used for SNP positions in `zfile` (this value must be 19 or 38) | +| `--out` | String | Output file name (without extensions) | ## Optional inputs diff --git a/docs/src/man/download.ipynb b/docs/src/man/download.ipynb index 69c5664c..513c7564 100644 --- a/docs/src/man/download.ipynb +++ b/docs/src/man/download.ipynb @@ -12,7 +12,7 @@ "\n", "| Operating System | v0.0.1 (24 Jan, 2024) |\n", "| :--- | :----: |\n", - "| Linux 64-bit | [Download]() (XXXGB) |\n", + "| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases) |\n", "\n", "After unzipping, the executable will be located inside `bin/GhostKnockoffGWAS`. We recommend adding the folder containing the `GhostKnockoffGWAS` executable to `PATH` for easier access." ] @@ -25,7 +25,7 @@ "\n", "| Population | Link | Number of SNPs | Description |\n", "| :--- | :----: | :---: | :---: |\n", - "| EUR (Europeans) | [download](https://drive.google.com/file/d/1_ajlxFWE2MCSgBXDgDbeZh9Lq721WANA/view) (8.2GB) |650826 | See **Note 1** |\n", + "| EUR (Europeans) | [download](https://zenodo.org/records/10433663) (7.5GB) |650826 | See **Note 1** |\n", "| ASN (East Asians) | TBD | |\n", "| AFR (Africans) | TBD | |\n", "| AMR (Admixed Americans) | TBD | | |\n", diff --git a/docs/src/man/download.md b/docs/src/man/download.md index f04c3daf..3762b046 100644 --- a/docs/src/man/download.md +++ b/docs/src/man/download.md @@ -7,7 +7,7 @@ Here is the main downloads page. New software and pre-processed knockoff data wi | Operating System | v0.0.1 (24 Jan, 2024) | | :--- | :----: | -| Linux 64-bit | [Download]() (XXXGB) | +| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases) | After unzipping, the executable will be located inside `bin/GhostKnockoffGWAS`. We recommend adding the folder containing the `GhostKnockoffGWAS` executable to `PATH` for easier access. @@ -15,7 +15,7 @@ After unzipping, the executable will be located inside `bin/GhostKnockoffGWAS`. | Population | Link | Number of SNPs | Description | | :--- | :----: | :---: | :---: | -| EUR (Europeans) | [download](https://drive.google.com/file/d/1_ajlxFWE2MCSgBXDgDbeZh9Lq721WANA/view) (8.2GB) |650826 | See **Note 1** | +| EUR (Europeans) | [download](https://zenodo.org/records/10433663) (7.5GB) |650826 | See **Note 1** | | ASN (East Asians) | TBD | | | AFR (Africans) | TBD | | | AMR (Admixed Americans) | TBD | | | diff --git a/docs/src/man/examples.ipynb b/docs/src/man/examples.ipynb index 9b79c101..55e37153 100644 --- a/docs/src/man/examples.ipynb +++ b/docs/src/man/examples.ipynb @@ -200,12 +200,12 @@ "+ `GhostKnockoffGWAS` first prints the user-specified parameters in the analysis. Verify that they are correct.\n", "+ Next we print the output of `count_matchable_snps`. It is essentially matching user supplied Z scores to the pre-computed knockoff data and counting how many SNPs can be matched. This information will be used to quantify the level shrinkage in Lasso regression. \n", "+ Then for each region, it will try to analyze the genome in quasi-independent regions, e.g. \n", - " ```\n", - " region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909\n", - " region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346\n", - " region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458\n", - " ...\n", - " ```\n", + "```\n", + "region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909\n", + "region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346\n", + "region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458\n", + "...\n", + "```\n", " Here there are 99 regions in chromosome 7. For each region it prints the number of non-zero beta estimated in that region, the number of Z-scores that are present in that region, and finally the level of shrinkage. The shrinkage level is a number between 0 and 1. It quantifies how well the correlation matrices used in the analysis approximates the LD structure for the original GWAS study under the null ($z = 0$), see [SuSiE paper](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010299) equation 24 for details.\n", "+ Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly half a minute. " ] @@ -257,11 +257,11 @@ "This is a comma-separated file that contains the full knockoff analysis output. The first 5 rows are shown:\n", "```\n", "$ head -5 example_output.txt\n", - "rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,W,kappa,tau,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2\n", - "rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0.0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0\n", - "rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0.0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0\n", - "rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0.0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0\n", - "rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0.0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0\n", + "rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,variant_kappa,variant_tau,variant_W,variant_q,pvals,group_W,group_kappa,group_tau,group_qvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2\n", + "rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0.0,0.0,0.0,1.0,0,0,0,0\n", + "rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0.0,0.0,0.0,1.0,0,0,0,0\n", + "rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0.0,0.0,0.0,1.0,0,0,0,0\n", + "rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0.0,0.0,0.0,1.0,0,0,0,0\n", "```\n", "\n", "The first row is a header row. Each proceeding row corresponds to a SNP that was used in the analysis. \n", @@ -270,8 +270,8 @@ "+ `group` column: defines group membership. Note that in GhostKnockoffGWAS, false discovery rate (FDR) is guaranteed at the group level, that is, the expected number of falsely discovered groups is less than the target FDR level.\n", "+ `zscores`: This is the user-provided Z-scores.\n", "+ `lasso_beta`: This is the Lasso's estimated effect size for each SNP conditional on the knockoffs. \n", - "+ `W,kappa,tau`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. \n", - "+ `qvals`: This is the knockoff q-values, which is the minimum target FDR for a given variable to be selected. For details, see eq 19 of [this paper](https://www.nature.com/articles/s41467-022-34932-z)\n", + "+ `variant_kappa,variant_tau,variant_W,variant_q,pvals,group_W,group_kappa,group_tau`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. \n", + "+ `variant_q,group_qvals`: This is the knockoff q-values, which is the minimum target FDR for a given variable to be selected, i.e. for a target FDR level $\\alpha$, all variants with `group_qvals` $\\le \\alpha$ is selected. `GhostKnockoffGWAS` performs selection on the group-level while variant-level qvalue is used for labeling significant SNPs in downstream manhattan plots. For details, see eq 19 of [this paper](https://www.nature.com/articles/s41467-022-34932-z)\n", "+ `pvals`: This is the p-value obtained by back-transforming the input Z-scores\n", "+ `selected_fdr*` columns: these inform whether the variable is selected. Its values are 0 (indicating the SNP does not belong to a group that has been selected) or 1 (this SNP has been selected, along with those in the same group )." ] diff --git a/docs/src/man/examples.md b/docs/src/man/examples.md index 340186c0..19293c1c 100644 --- a/docs/src/man/examples.md +++ b/docs/src/man/examples.md @@ -180,12 +180,12 @@ Overall runtime = 34.12649257 seconds, with + `GhostKnockoffGWAS` first prints the user-specified parameters in the analysis. Verify that they are correct. + Next we print the output of `count_matchable_snps`. It is essentially matching user supplied Z scores to the pre-computed knockoff data and counting how many SNPs can be matched. This information will be used to quantify the level shrinkage in Lasso regression. + Then for each region, it will try to analyze the genome in quasi-independent regions, e.g. - ``` - region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909 - region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346 - region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458 - ... - ``` +``` +region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909 +region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346 +region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458 +... +``` Here there are 99 regions in chromosome 7. For each region it prints the number of non-zero beta estimated in that region, the number of Z-scores that are present in that region, and finally the level of shrinkage. The shrinkage level is a number between 0 and 1. It quantifies how well the correlation matrices used in the analysis approximates the LD structure for the original GWAS study under the null ($z = 0$), see [SuSiE paper](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010299) equation 24 for details. + Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly half a minute. @@ -232,11 +232,11 @@ sample_knockoff_time_t24,2.3132567919999993 This is a comma-separated file that contains the full knockoff analysis output. The first 5 rows are shown: ``` $ head -5 example_output.txt -rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,W,kappa,tau,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2 -rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0.0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0 -rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0.0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0 -rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0.0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0 -rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0.0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0 +rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,variant_kappa,variant_tau,variant_W,variant_q,pvals,group_W,group_kappa,group_tau,group_qvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2 +rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0.0,0.0,0.0,1.0,0,0,0,0 +rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0.0,0.0,0.0,1.0,0,0,0,0 +rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0.0,0.0,0.0,1.0,0,0,0,0 +rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0.0,0.0,0.0,1.0,0,0,0,0 ``` The first row is a header row. Each proceeding row corresponds to a SNP that was used in the analysis. @@ -245,8 +245,8 @@ The first row is a header row. Each proceeding row corresponds to a SNP that was + `group` column: defines group membership. Note that in GhostKnockoffGWAS, false discovery rate (FDR) is guaranteed at the group level, that is, the expected number of falsely discovered groups is less than the target FDR level. + `zscores`: This is the user-provided Z-scores. + `lasso_beta`: This is the Lasso's estimated effect size for each SNP conditional on the knockoffs. -+ `W,kappa,tau`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. -+ `qvals`: This is the knockoff q-values, which is the minimum target FDR for a given variable to be selected. For details, see eq 19 of [this paper](https://www.nature.com/articles/s41467-022-34932-z) ++ `variant_kappa,variant_tau,variant_W,variant_q,pvals,group_W,group_kappa,group_tau`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. ++ `variant_q,group_qvals`: This is the knockoff q-values, which is the minimum target FDR for a given variable to be selected, i.e. for a target FDR level $\alpha$, all variants with `group_qvals` $\le \alpha$ is selected. `GhostKnockoffGWAS` performs selection on the group-level while variant-level qvalue is used for labeling significant SNPs in downstream manhattan plots. For details, see eq 19 of [this paper](https://www.nature.com/articles/s41467-022-34932-z) + `pvals`: This is the p-value obtained by back-transforming the input Z-scores + `selected_fdr*` columns: these inform whether the variable is selected. Its values are 0 (indicating the SNP does not belong to a group that has been selected) or 1 (this SNP has been selected, along with those in the same group ). diff --git a/docs/src/man/getting_started.ipynb b/docs/src/man/getting_started.ipynb index 088a4239..0b13fc29 100644 --- a/docs/src/man/getting_started.ipynb +++ b/docs/src/man/getting_started.ipynb @@ -6,7 +6,7 @@ "source": [ "# Getting started with Ghost Knockoff GWAS analysis\n", "\n", - "This package conducts knockoff-based inference to perform genome-wide conditional independent tests based on GWAS summary statistics (e.g. p-values). The methodology is described in the following papers\n", + "This package conducts knockoff-based inference to perform genome-wide conditional independent tests based on GWAS summary statistics (e.g. p-values). The methodology is described in the following paper\n", "\n", "> He Z, Chu BB, Yang J, Gu J, Chen Z, Liu L, Morrison T, Bellow M, Qi X, Hejazi N, Mathur M, Le Guen Y, Tang H, Hastie T, Ionita-laza, I, Sabatti C, Candes C. \"In silico identification of putative causal genetic variants\", bioRxiv 2024. \n", "\n", diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md index 6d8540b1..60be8fad 100644 --- a/docs/src/man/getting_started.md +++ b/docs/src/man/getting_started.md @@ -1,7 +1,7 @@ # Getting started with Ghost Knockoff GWAS analysis -This package conducts knockoff-based inference to perform genome-wide conditional independent tests based on GWAS summary statistics (e.g. p-values). The methodology is described in the following papers +This package conducts knockoff-based inference to perform genome-wide conditional independent tests based on GWAS summary statistics (e.g. p-values). The methodology is described in the following paper > He Z, Chu BB, Yang J, Gu J, Chen Z, Liu L, Morrison T, Bellow M, Qi X, Hejazi N, Mathur M, Le Guen Y, Tang H, Hastie T, Ionita-laza, I, Sabatti C, Candes C. "In silico identification of putative causal genetic variants", bioRxiv 2024. diff --git a/docs/src/man/julia.ipynb b/docs/src/man/julia.ipynb index 4f47bdc5..bea421ef 100644 --- a/docs/src/man/julia.ipynb +++ b/docs/src/man/julia.ipynb @@ -73,7 +73,7 @@ "source": [ "## Compiling GhostKnockoffGWAS\n", "\n", - "1. `]add libcxxwrap_julia_jll` (note: as of Feb 2024, libcxxwrap_julia_jll must be v0.11.x)\n", + "1. `]add libcxxwrap_julia_jll` (note: as of Feb 2024, `libcxxwrap_julia_jll` must be v0.11.x)\n", "2. Make sure `GhostKnockoffGWAS` is installed within Julia. \n", "3. `dev` the package via\n", " ```julia\n", diff --git a/docs/src/man/julia.md b/docs/src/man/julia.md index 2a34d1ca..efb64e40 100644 --- a/docs/src/man/julia.md +++ b/docs/src/man/julia.md @@ -47,7 +47,7 @@ read_zscores ## Compiling GhostKnockoffGWAS -1. `]add libcxxwrap_julia_jll` (note: as of Feb 2024, libcxxwrap_julia_jll must be v0.11.x) +1. `]add libcxxwrap_julia_jll` (note: as of Feb 2024, `libcxxwrap_julia_jll` must be v0.11.x) 2. Make sure `GhostKnockoffGWAS` is installed within Julia. 3. `dev` the package via ```julia