From a3640ff42ae73fd79d736a4fe7535569b0873e1d Mon Sep 17 00:00:00 2001 From: Benjamin Chu Date: Fri, 28 Jun 2024 11:14:13 -0700 Subject: [PATCH] update docs for v0.2.0 release (#18) --- .gitignore | 2 ++ Project.toml | 2 +- docs/src/man/download.ipynb | 4 +-- docs/src/man/download.md | 4 +-- docs/src/man/examples.ipynb | 72 ++++++++++++++++++------------------- docs/src/man/examples.md | 72 ++++++++++++++++++------------------- docs/src/man/intro.ipynb | 2 +- docs/src/man/intro.md | 2 +- 8 files changed, 81 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index a59876e5..8f8a6617 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ data/test* data/AD* data/seed* data/*output* + +app_linux_x86.tar.gz diff --git a/Project.toml b/Project.toml index 00641b8d..09827c95 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GhostKnockoffGWAS" uuid = "28dc8d00-4921-4061-9921-3f423e4be5cc" authors = ["Benjamin Chu "] -version = "0.1.3" +version = "0.2.0" [deps] ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" diff --git a/docs/src/man/download.ipynb b/docs/src/man/download.ipynb index c53b6310..c7b41261 100644 --- a/docs/src/man/download.ipynb +++ b/docs/src/man/download.ipynb @@ -10,9 +10,9 @@ "\n", "## Software\n", "\n", - "| Operating System | v0.1.2 (March 28th, 2024) |\n", + "| Operating System | v0.2.0 (June 27th, 2024) |\n", "| :--- | :----: |\n", - "| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases/tag/v0.1.2) |\n", + "| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases/tag/v0.2.0) |\n", "\n", "After unzipping, the executable will be located inside `bin/GhostKnockoffGWAS`. We recommend adding the folder containing the `GhostKnockoffGWAS` executable to `PATH` for easier access." ] diff --git a/docs/src/man/download.md b/docs/src/man/download.md index 37d726fc..af289df8 100644 --- a/docs/src/man/download.md +++ b/docs/src/man/download.md @@ -5,9 +5,9 @@ Here is the main downloads page. New software and pre-processed knockoff data wi ## Software -| Operating System | v0.1.2 (March 28th, 2024) | +| Operating System | v0.2.0 (June 27th, 2024) | | :--- | :----: | -| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases/tag/v0.1.2) | +| Linux 64-bit | [Download](https://github.com/biona001/GhostKnockoffGWAS/releases/tag/v0.2.0) | After unzipping, the executable will be located inside `bin/GhostKnockoffGWAS`. We recommend adding the folder containing the `GhostKnockoffGWAS` executable to `PATH` for easier access. diff --git a/docs/src/man/examples.ipynb b/docs/src/man/examples.ipynb index 82c65038..9570494f 100644 --- a/docs/src/man/examples.ipynb +++ b/docs/src/man/examples.ipynb @@ -14,7 +14,7 @@ "\n", "1. Step 1: Download pre-processed LD files and binary executable and extract their content\n", "\n", - " wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz\n", + " wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz\n", " wget https://zenodo.org/records/10433663/files/EUR.zip\n", " tar -xvzf app_linux_x86.tar.gz\n", " unzip EUR.zip # decompresses to ~8.7GB\n", @@ -36,7 +36,7 @@ "\n", "Proceed to the [Downloads page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) and download (1) the software as well as (2) a pre-processed knockoff dataset suitable for your analysis, e.g.\n", "```shell\n", - "wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz\n", + "wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz\n", "wget https://zenodo.org/records/10433663/files/EUR.zip\n", "```\n", "Next, unzip the files in linux command line via:\n", @@ -99,31 +99,31 @@ "```\n", "Welcome to GhostKnockoffGWAS analysis!\n", "You have specified the following options:\n", - "zfile = /scratch/users/bbchu/GhostKnockoffGWAS/data/example_zfile.txt\n", - "LD_files = /scratch/users/bbchu/GhostKnockoffGWAS/data/EUR\n", + "zfile = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_zfile.txt\n", + "LD_files = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/EUR\n", "N (sample size) = 506200\n", "hg_build = 38\n", - "outdir = /scratch/users/bbchu/GhostKnockoffGWAS/data/\n", - "outfile = /scratch/users/bbchu/GhostKnockoffGWAS/data/example_output\n", + "outdir = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/\n", + "outfile = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_output\n", "seed = 2023\n", "verbose = true\n", - "random_shuffle = true\n", + "random_shuffle = false\n", "skip_shrinkage_check = false\n", "\n", "count_matchable_snps processed chr 7, cumulative SNPs = 35855\n", - "region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909\n", - "region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346\n", + "region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 10, nsnps = 306, shrinkage = 0.1909\n", + "region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 13, nsnps = 332, shrinkage = 0.0346\n", "region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458\n", "region 4 / 99 (f = LD_start104159525_end105682904.h5): chr 7, nz beta = 10, nsnps = 358, shrinkage = 0.0012\n", - "region 5 / 99 (f = LD_start105682905_end107780177.h5): chr 7, nz beta = 18, nsnps = 532, shrinkage = 0.0034\n", + "region 5 / 99 (f = LD_start105682905_end107780177.h5): chr 7, nz beta = 19, nsnps = 532, shrinkage = 0.0034\n", "...\n", "\n", "Matched 35855 SNPs with Z-scores to the reference panel\n", "Mean LD shrinkage = 0.020501422972314207.\n", - "Done! Result saved to /scratch/users/bbchu/GhostKnockoffGWAS/data/example_output. \n", - "Overall runtime = 34.12649257 seconds, with \n", - " 1.456621308 seconds spent on reading the Z score file\n", - " 32.669871262 seconds spent on doing the analysis\n", + "Done! Result saved to /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_output. \n", + "Overall runtime = 59.10963104499999 seconds, with \n", + " 2.67976535 seconds spent on reading the Z score file\n", + " 56.429865695 seconds spent on doing the analysis\n", "```\n", "\n", "**Explanation for intermediate outputs**:\n", @@ -132,13 +132,13 @@ "+ Next we print the output of `count_matchable_snps`. It is essentially matching user supplied Z scores to the pre-computed knockoff data and counting how many SNPs can be matched. This information will be used to quantify the level shrinkage in Lasso regression. \n", "+ Then for each region, it will try to analyze the genome in quasi-independent regions, e.g. \n", "```\n", - "region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909\n", - "region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346\n", + "region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 10, nsnps = 306, shrinkage = 0.1909\n", + "region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 13, nsnps = 332, shrinkage = 0.0346\n", "region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458\n", "...\n", "```\n", "+ Here there are 99 regions in chromosome 7. For each region it prints the number of non-zero beta estimated in that region, the number of Z-scores that are present in that region, and finally the level of shrinkage. The shrinkage level is a number between 0 and 1. It quantifies how well the correlation matrices used in the analysis approximates the LD structure for the original GWAS study under the null ($z = 0$), see [SuSiE paper](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010299) equation 24 for details.\n", - "+ Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly half a minute. " + "+ Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly a minute. " ] }, { @@ -157,23 +157,23 @@ "\n", "```\n", "target_fdr_0.01_num_selected,0\n", - "target_fdr_0.05_num_selected,10\n", + "target_fdr_0.05_num_selected,8\n", "target_fdr_0.1_num_selected,15\n", - "target_fdr_0.2_num_selected,25\n", + "target_fdr_0.2_num_selected,24\n", "m,5\n", "nregions,99\n", "nsnps,35855\n", "lasso_lambda,0.003807185801078654\n", - "mean_LD_shrinkage,0.020501422972314207\n", - "import_time,11.890378966000004\n", - "sample_knockoff_time,7.483832024999999\n", - "ghostbasil_time,0.7074100039999999\n", - "knockoff_filter_time,6.180893948\n", - "total_time,26.47797393798828\n", - "sample_knockoff_time_t21,2.7361857229999993\n", - "sample_knockoff_time_t22,1.532605566\n", - "sample_knockoff_time_t23,0.81481418\n", - "sample_knockoff_time_t24,2.3322181589999995\n", + "mean_LD_shrinkage,0.02050142301854868\n", + "import_time,20.175387030000003\n", + "sample_knockoff_time,11.177338719\n", + "ghostbasil_time,0.8567632319999999\n", + "knockoff_filter_time,8.442171458\n", + "total_time,41.34364295005798\n", + "sample_knockoff_time_t21,3.9536202269999987\n", + "sample_knockoff_time_t22,2.155882097\n", + "sample_knockoff_time_t23,1.0207068969999997\n", + "sample_knockoff_time_t24,3.2455576749999993\n", "```\n", "\n", "+ The first 4 rows indicate the number of discovered SNPs according to `GhostKnockoffGWAS`, for different target FDR levels. For example, when target $\\text{FDR} = 0.1$, there are 15 significant SNPs whose knockoff q-value is below 0.1. If these SNPs reside in different groups, then according to the knockoff procedure, these discoveries are conditionally independent. Later in step 5, we will apply a post-processing step to further count the number of independent discoveries as determined by the physical distance between these SNPs. \n", @@ -188,17 +188,17 @@ "This is a comma-separated file that contains the full knockoff analysis output. The first 5 rows are shown:\n", "```\n", "$ head -5 example_output.txt\n", - "rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,kappa,tau,W,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2\n", - "rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0\n", - "rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0\n", - "rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0\n", - "rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0\n", + "rsid,AF,chr,ref,alt,pos_hg38,group,zscores,lasso_beta,kappa,tau,W,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2\n", + "rs4535687,0.15927,7,G,C,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0\n", + "rs62429406,0.031058,7,T,G,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0\n", + "rs117163387,0.034958,7,C,T,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0\n", + "rs4247525,0.040199,7,T,C,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0\n", "```\n", "\n", "The first row is a header row. Each proceeding row corresponds to a SNP that was used in the analysis. \n", "\n", - "+ `rsid,AF,chr,ref,alt,pos_hg19,pos_hg38` is the SNP ID, alternate allele frequency, reference allele, alternate allele, basepair position in HG19 coordinates, and basepair position in HG38 coordinates.\n", - "+ `group` column: defines group membership. Note that in GhostKnockoffGWAS, false discovery rate (FDR) is guaranteed at the group level, that is, the expected number of falsely discovered groups is less than the target FDR level.\n", + "+ `rsid,AF,chr,ref,alt,pos_hg38` is the SNP ID, alternate allele frequency, reference allele, alternate allele, basepair position in HG38 coordinates.\n", + "+ `group` column: defines group membership. \n", "+ `zscores`: This is the user-provided Z-scores.\n", "+ `lasso_beta`: This is the Lasso's estimated effect size for each SNP conditional on the knockoffs. \n", "+ `kappa,tau,W`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. \n", diff --git a/docs/src/man/examples.md b/docs/src/man/examples.md index f832950b..219c1422 100644 --- a/docs/src/man/examples.md +++ b/docs/src/man/examples.md @@ -9,7 +9,7 @@ Here is a short summary of this tutorial: 1. Step 1: Download pre-processed LD files and binary executable and extract their content - wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz + wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz wget https://zenodo.org/records/10433663/files/EUR.zip tar -xvzf app_linux_x86.tar.gz unzip EUR.zip # decompresses to ~8.7GB @@ -26,7 +26,7 @@ Here is a short summary of this tutorial: Proceed to the [Downloads page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) and download (1) the software as well as (2) a pre-processed knockoff dataset suitable for your analysis, e.g. ```shell -wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz +wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz wget https://zenodo.org/records/10433663/files/EUR.zip ``` Next, unzip the files in linux command line via: @@ -79,31 +79,31 @@ Here is the expected output: ``` Welcome to GhostKnockoffGWAS analysis! You have specified the following options: -zfile = /scratch/users/bbchu/GhostKnockoffGWAS/data/example_zfile.txt -LD_files = /scratch/users/bbchu/GhostKnockoffGWAS/data/EUR +zfile = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_zfile.txt +LD_files = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/EUR N (sample size) = 506200 hg_build = 38 -outdir = /scratch/users/bbchu/GhostKnockoffGWAS/data/ -outfile = /scratch/users/bbchu/GhostKnockoffGWAS/data/example_output +outdir = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/ +outfile = /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_output seed = 2023 verbose = true -random_shuffle = true +random_shuffle = false skip_shrinkage_check = false count_matchable_snps processed chr 7, cumulative SNPs = 35855 -region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909 -region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346 +region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 10, nsnps = 306, shrinkage = 0.1909 +region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 13, nsnps = 332, shrinkage = 0.0346 region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458 region 4 / 99 (f = LD_start104159525_end105682904.h5): chr 7, nz beta = 10, nsnps = 358, shrinkage = 0.0012 -region 5 / 99 (f = LD_start105682905_end107780177.h5): chr 7, nz beta = 18, nsnps = 532, shrinkage = 0.0034 +region 5 / 99 (f = LD_start105682905_end107780177.h5): chr 7, nz beta = 19, nsnps = 532, shrinkage = 0.0034 ... Matched 35855 SNPs with Z-scores to the reference panel Mean LD shrinkage = 0.020501422972314207. -Done! Result saved to /scratch/users/bbchu/GhostKnockoffGWAS/data/example_output. -Overall runtime = 34.12649257 seconds, with - 1.456621308 seconds spent on reading the Z score file - 32.669871262 seconds spent on doing the analysis +Done! Result saved to /oak/stanford/groups/zihuai/pan_ukb_group_knockoffs/example_output. +Overall runtime = 59.10963104499999 seconds, with + 2.67976535 seconds spent on reading the Z score file + 56.429865695 seconds spent on doing the analysis ``` **Explanation for intermediate outputs**: @@ -112,13 +112,13 @@ Overall runtime = 34.12649257 seconds, with + Next we print the output of `count_matchable_snps`. It is essentially matching user supplied Z scores to the pre-computed knockoff data and counting how many SNPs can be matched. This information will be used to quantify the level shrinkage in Lasso regression. + Then for each region, it will try to analyze the genome in quasi-independent regions, e.g. ``` -region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 9, nsnps = 306, shrinkage = 0.1909 -region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 11, nsnps = 332, shrinkage = 0.0346 +region 1 / 99 (f = LD_start100196651_end101199252.h5): chr 7, nz beta = 10, nsnps = 306, shrinkage = 0.1909 +region 2 / 99 (f = LD_start101199253_end103197509.h5): chr 7, nz beta = 13, nsnps = 332, shrinkage = 0.0346 region 3 / 99 (f = LD_start103197510_end104159524.h5): chr 7, nz beta = 12, nsnps = 215, shrinkage = 0.0458 ... ``` + Here there are 99 regions in chromosome 7. For each region it prints the number of non-zero beta estimated in that region, the number of Z-scores that are present in that region, and finally the level of shrinkage. The shrinkage level is a number between 0 and 1. It quantifies how well the correlation matrices used in the analysis approximates the LD structure for the original GWAS study under the null ($z = 0$), see [SuSiE paper](https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010299) equation 24 for details. -+ Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly half a minute. ++ Finally, the program concludes by printing the number of Z scores successfully matched, the output path, as well as a rough estimate of runtime. In this simple example, the analysis finished in roughly a minute. ## Step 4: Interpreting the result @@ -132,23 +132,23 @@ This file contains broad summary of the analysis, as shown below ``` target_fdr_0.01_num_selected,0 -target_fdr_0.05_num_selected,10 +target_fdr_0.05_num_selected,8 target_fdr_0.1_num_selected,15 -target_fdr_0.2_num_selected,25 +target_fdr_0.2_num_selected,24 m,5 nregions,99 nsnps,35855 lasso_lambda,0.003807185801078654 -mean_LD_shrinkage,0.020501422972314207 -import_time,11.890378966000004 -sample_knockoff_time,7.483832024999999 -ghostbasil_time,0.7074100039999999 -knockoff_filter_time,6.180893948 -total_time,26.47797393798828 -sample_knockoff_time_t21,2.7361857229999993 -sample_knockoff_time_t22,1.532605566 -sample_knockoff_time_t23,0.81481418 -sample_knockoff_time_t24,2.3322181589999995 +mean_LD_shrinkage,0.02050142301854868 +import_time,20.175387030000003 +sample_knockoff_time,11.177338719 +ghostbasil_time,0.8567632319999999 +knockoff_filter_time,8.442171458 +total_time,41.34364295005798 +sample_knockoff_time_t21,3.9536202269999987 +sample_knockoff_time_t22,2.155882097 +sample_knockoff_time_t23,1.0207068969999997 +sample_knockoff_time_t24,3.2455576749999993 ``` + The first 4 rows indicate the number of discovered SNPs according to `GhostKnockoffGWAS`, for different target FDR levels. For example, when target $\text{FDR} = 0.1$, there are 15 significant SNPs whose knockoff q-value is below 0.1. If these SNPs reside in different groups, then according to the knockoff procedure, these discoveries are conditionally independent. Later in step 5, we will apply a post-processing step to further count the number of independent discoveries as determined by the physical distance between these SNPs. @@ -163,17 +163,17 @@ sample_knockoff_time_t24,2.3322181589999995 This is a comma-separated file that contains the full knockoff analysis output. The first 5 rows are shown: ``` $ head -5 example_output.txt -rsid,AF,chr,ref,alt,pos_hg19,pos_hg38,group,zscores,lasso_beta,kappa,tau,W,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2 -rs4535687,0.15927,7,G,C,41892,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0 -rs62429406,0.031058,7,T,G,43748,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0 -rs117163387,0.034958,7,C,T,43961,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0 -rs4247525,0.040199,7,T,C,44167,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0 +rsid,AF,chr,ref,alt,pos_hg38,group,zscores,lasso_beta,kappa,tau,W,qvals,pvals,selected_fdr0.01,selected_fdr0.05,selected_fdr0.1,selected_fdr0.2 +rs4535687,0.15927,7,G,C,41892,chr7_start16161_end972751_group1_0,-1.17940334810126,0.0,0,0.0,0.0,1.0,0.23823760256835697,0,0,0,0 +rs62429406,0.031058,7,T,G,43748,chr7_start16161_end972751_group2_0,0.636126444862832,0.0,0,0.0,0.0,1.0,0.5246940103826294,0,0,0,0 +rs117163387,0.034958,7,C,T,43961,chr7_start16161_end972751_group3_0,-0.548757491205702,0.0,0,0.0,0.0,1.0,0.5831718861307663,0,0,0,0 +rs4247525,0.040199,7,T,C,44167,chr7_start16161_end972751_group4_0,0.463442453535633,0.0,0,0.0,0.0,1.0,0.6430472544316368,0,0,0,0 ``` The first row is a header row. Each proceeding row corresponds to a SNP that was used in the analysis. -+ `rsid,AF,chr,ref,alt,pos_hg19,pos_hg38` is the SNP ID, alternate allele frequency, reference allele, alternate allele, basepair position in HG19 coordinates, and basepair position in HG38 coordinates. -+ `group` column: defines group membership. Note that in GhostKnockoffGWAS, false discovery rate (FDR) is guaranteed at the group level, that is, the expected number of falsely discovered groups is less than the target FDR level. ++ `rsid,AF,chr,ref,alt,pos_hg38` is the SNP ID, alternate allele frequency, reference allele, alternate allele, basepair position in HG38 coordinates. ++ `group` column: defines group membership. + `zscores`: This is the user-provided Z-scores. + `lasso_beta`: This is the Lasso's estimated effect size for each SNP conditional on the knockoffs. + `kappa,tau,W`: these are knockoff statistics computed from the analysis, please refer to our paper for more detail. diff --git a/docs/src/man/intro.ipynb b/docs/src/man/intro.ipynb index e8c8d6d9..6b36a6bd 100644 --- a/docs/src/man/intro.ipynb +++ b/docs/src/man/intro.ipynb @@ -40,7 +40,7 @@ "\n", "1. Go to [Download Page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) and download (1) the software and (2) the pre-processed LD files. For example,\n", "\n", - " wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz\n", + " wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz\n", " wget https://zenodo.org/records/10433663/files/EUR.zip\n", "2. Unzip them both:\n", "\n", diff --git a/docs/src/man/intro.md b/docs/src/man/intro.md index b3744cf4..7994a506 100644 --- a/docs/src/man/intro.md +++ b/docs/src/man/intro.md @@ -25,7 +25,7 @@ Most users are expected to follow this workflow. Detailed explanations for each 1. Go to [Download Page](https://biona001.github.io/GhostKnockoffGWAS/dev/man/download) and download (1) the software and (2) the pre-processed LD files. For example, - wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.1.2/app_linux_x86.tar.gz + wget https://github.com/biona001/GhostKnockoffGWAS/releases/download/v0.2.0/app_linux_x86.tar.gz wget https://zenodo.org/records/10433663/files/EUR.zip 2. Unzip them both: