From e60008cd73d55634a2b1c2c3821df41916e92d39 Mon Sep 17 00:00:00 2001 From: Benjamin Chu Date: Fri, 12 Jan 2024 15:37:32 -0800 Subject: [PATCH] drop dependency on Knockoffs.jl to improve binary size --- Manifest.toml | 471 +------------------------------------ Project.toml | 3 +- src/GhostKnockoffGWAS.jl | 2 +- src/app.jl | 2 +- src/ghostbasil_parallel.jl | 6 +- src/utilities.jl | 165 +++++++++++++ 6 files changed, 175 insertions(+), 474 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 59f1112f..7ac3548b 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,17 +2,7 @@ julia_version = "1.9.1" manifest_format = "2.0" -project_hash = "538e26e3f49acdf41cb2e0c8c984b0b2b47f69b0" - -[[deps.AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "16b6dbc4cf7caee4e1e75c49485ec67b667098a0" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "1.3.1" -weakdeps = ["ChainRulesCore"] - - [deps.AbstractFFTs.extensions] - AbstractFFTsChainRulesCoreExt = "ChainRulesCore" +project_hash = "affe812e710c3adc1b5d70b32aa31834d4060cf7" [[deps.Adapt]] deps = ["LinearAlgebra", "Requires"] @@ -56,76 +46,12 @@ version = "7.4.5" StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" -[[deps.ArrayInterfaceCore]] -deps = ["LinearAlgebra", "SnoopPrecompile", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "e5f08b5689b1aad068e01751889f2f615c7db36d" -uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2" -version = "0.1.29" - [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" -[[deps.Automa]] -deps = ["Printf", "ScanByte", "TranscodingStreams"] -git-tree-sha1 = "d50976f217489ce799e366d9561d56a98a30d7fe" -uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" -version = "0.8.2" - -[[deps.BGZFStreams]] -deps = ["CodecZlib"] -git-tree-sha1 = "a9c80401403c068c02784cd53d417d3a82e2d2bd" -uuid = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" -version = "0.3.1" - [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[deps.BenchmarkTools]] -deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] -git-tree-sha1 = "d9a9701b899b30332bbcb3e1679c41cce81fb0e8" -uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -version = "1.3.2" - -[[deps.BioCore]] -deps = ["Automa", "BufferedStreams", "YAML"] -git-tree-sha1 = "476edbf4ef94594fff430a84ca96f86cb2327a71" -uuid = "37cfa864-2cd6-5c12-ad9e-b6597d696c81" -version = "2.0.5" - -[[deps.BitTwiddlingConvenienceFunctions]] -deps = ["Static"] -git-tree-sha1 = "0c5f81f47bbbcf4aea7b2959135713459170798b" -uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" -version = "0.1.5" - -[[deps.BlockDiagonals]] -deps = ["ChainRulesCore", "FillArrays", "FiniteDifferences", "LinearAlgebra"] -git-tree-sha1 = "ffd635c19b56f50d1d4278d876219644299b5711" -uuid = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" -version = "0.1.41" - -[[deps.BufferedStreams]] -git-tree-sha1 = "bb065b14d7f941b8617bc323063dbe79f55d16ea" -uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" -version = "1.1.0" - -[[deps.Bzip2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" -uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" -version = "1.0.8+0" - -[[deps.CEnum]] -git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.2" - -[[deps.CPUSummary]] -deps = ["CpuId", "IfElse", "Static"] -git-tree-sha1 = "2c144ddb46b552f72d7eafe7cc2f50746e41ea21" -uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" -version = "0.2.2" - [[deps.CSV]] deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] git-tree-sha1 = "ed28c86cbde3dc3f53cf76643c2e9bc11d56acc7" @@ -138,58 +64,12 @@ git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" version = "0.5.1" -[[deps.ChainRulesCore]] -deps = ["Compat", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "e30f2f4e20f7f186dc36529910beaedc60cfa644" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "1.16.0" - -[[deps.CloseOpenIntervals]] -deps = ["Static", "StaticArrayInterface"] -git-tree-sha1 = "70232f82ffaab9dc52585e0dd043b5e0c6b714f1" -uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" -version = "0.1.12" - -[[deps.Clustering]] -deps = ["Distances", "LinearAlgebra", "NearestNeighbors", "Printf", "Random", "SparseArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "a6e6ce44a1e0a781772fc795fb7343b1925e9898" -uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -version = "0.15.2" - -[[deps.CodecBzip2]] -deps = ["Bzip2_jll", "Libdl", "TranscodingStreams"] -git-tree-sha1 = "2e62a725210ce3c3c2e1a3080190e7ca491f18d7" -uuid = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" -version = "0.7.2" - -[[deps.CodecXz]] -deps = ["Libdl", "TranscodingStreams", "XZ_jll"] -git-tree-sha1 = "82c4c000edf64b6bda6766377e69a1028f3549ee" -uuid = "ba30903b-d9e8-5048-a5ec-d1f5b0d4b47b" -version = "0.7.0" - [[deps.CodecZlib]] deps = ["TranscodingStreams", "Zlib_jll"] git-tree-sha1 = "9c209fb7536406834aa938fb149964b985de6c83" uuid = "944b1d66-785c-5afd-91f1-9de20f533193" version = "0.7.1" -[[deps.CodecZstd]] -deps = ["CEnum", "TranscodingStreams", "Zstd_jll"] -git-tree-sha1 = "849470b337d0fa8449c21061de922386f32949d9" -uuid = "6b39b394-51ab-5f42-8807-6242bab2b4c2" -version = "0.7.2" - -[[deps.Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - -[[deps.CommonSolve]] -git-tree-sha1 = "9441451ee712d1aec22edad62db1a9af3dc8d852" -uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" -version = "0.2.3" - [[deps.CommonSubexpressions]] deps = ["MacroTools", "Test"] git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" @@ -225,18 +105,6 @@ version = "1.5.2" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -[[deps.CovarianceEstimation]] -deps = ["LinearAlgebra", "Statistics", "StatsBase"] -git-tree-sha1 = "3c8de95b4e932d76ec8960e12d681eba580e9674" -uuid = "587fd27a-f159-11e8-2dae-1979310e6154" -version = "0.2.8" - -[[deps.CpuId]] -deps = ["Markdown"] -git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" -uuid = "adafc99b-e345-5852-983c-f28acb93d879" -version = "0.3.1" - [[deps.Crayons]] git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" @@ -292,12 +160,6 @@ git-tree-sha1 = "a4ad7ef19d2cdc2eff57abbbe68032b1cd0bd8f8" uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" version = "1.13.0" -[[deps.Distances]] -deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "49eba9ad9f7ead780bfb7ee319f962c811c6d3b2" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.10.8" - [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -333,24 +195,6 @@ git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566" uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74" version = "0.6.8" -[[deps.ElasticArrays]] -deps = ["Adapt"] -git-tree-sha1 = "e1c40d78de68e9a2be565f0202693a158ec9ad85" -uuid = "fdbdab4c-e67f-52f5-8c3f-e7b388dad3d4" -version = "1.2.11" - -[[deps.FFTW]] -deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"] -git-tree-sha1 = "f9818144ce7c8c41edf5c4c179c684d92aa4d9fe" -uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.6.0" - -[[deps.FFTW_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "c6033cc3892d0ef5bb9cd29b7f2f0331ea5184ea" -uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" -version = "3.3.10+0" - [[deps.FileIO]] deps = ["Pkg", "Requires", "UUIDs"] git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" @@ -378,12 +222,6 @@ git-tree-sha1 = "6604e18a0220650dbbea7854938768f15955dd8e" uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" version = "2.20.0" -[[deps.FiniteDifferences]] -deps = ["ChainRulesCore", "LinearAlgebra", "Printf", "Random", "Richardson", "SparseArrays", "StaticArrays"] -git-tree-sha1 = "3f605dd6db5640c5278f2551afc9427656439f42" -uuid = "26cc04aa-876d-5657-8c51-4c34ba976000" -version = "0.12.26" - [[deps.Formatting]] deps = ["Printf"] git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" @@ -404,37 +242,12 @@ weakdeps = ["StaticArrays"] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" -[[deps.GLM]] -deps = ["Distributions", "LinearAlgebra", "Printf", "Reexport", "SparseArrays", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "StatsModels"] -git-tree-sha1 = "97829cfda0df99ddaeaafb5b370d6cab87b7013e" -uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -version = "1.8.3" - -[[deps.GLMNet]] -deps = ["DataFrames", "Distributed", "Distributions", "Printf", "Random", "SparseArrays", "StatsBase", "glmnet_jll"] -git-tree-sha1 = "49ef90cd140f8a99a81338f1e08e8ebc18837a63" -uuid = "8d5ece8b-de18-5317-b113-243142960cc6" -version = "0.7.0" - -[[deps.GenericLinearAlgebra]] -deps = ["LinearAlgebra", "Printf", "Random", "libblastrampoline_jll"] -git-tree-sha1 = "02be7066f936af6b04669f7c370a31af9036c440" -uuid = "14197337-ba66-59df-a3e3-ca00e7dcff7a" -version = "0.3.11" - [[deps.Ghostbasil]] deps = ["CxxWrap", "ghostbasil_jll"] -git-tree-sha1 = "d6324a2b7e367dd1dfb92e59f8fefa448ec87110" -repo-rev = "main" -repo-url = "https://github.com/biona001/Ghostbasil.jl" +path = "/Users/biona001/.julia/dev/Ghostbasil" uuid = "42a9b20f-2d23-465a-b7ed-975dbff4a4d6" version = "0.0.1" -[[deps.Glob]] -git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496" -uuid = "c27321d9-0574-5035-807b-f59d2c89b15c" -version = "1.3.1" - [[deps.HDF5]] deps = ["Compat", "HDF5_jll", "Libdl", "Mmap", "Random", "Requires", "UUIDs"] git-tree-sha1 = "3dab31542b3da9f25a6a1d11159d4af8fdce7d67" @@ -447,41 +260,18 @@ git-tree-sha1 = "4cc2bb72df6ff40b055295fdef6d92955f9dede8" uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" version = "1.12.2+2" -[[deps.HostCPUFeatures]] -deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] -git-tree-sha1 = "734fd90dd2f920a2f1921d5388dcebe805b262dc" -uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" -version = "0.1.14" - -[[deps.Hypatia]] -deps = ["Combinatorics", "DocStringExtensions", "GenericLinearAlgebra", "IterativeSolvers", "LinearAlgebra", "LinearMaps", "MathOptInterface", "PolynomialRoots", "Printf", "Requires", "SparseArrays", "SpecialFunctions", "SuiteSparse", "Test"] -git-tree-sha1 = "18b575348f4b5c385963ce53971060b77a3ba033" -uuid = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" -version = "0.7.2" - [[deps.HypergeometricFunctions]] deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] git-tree-sha1 = "84204eae2dd237500835990bcade263e27674a93" uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" version = "0.3.16" -[[deps.IfElse]] -git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" -uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -version = "0.1.1" - [[deps.InlineStrings]] deps = ["Parsers"] git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" version = "1.4.0" -[[deps.IntelOpenMP_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "0cb9352ef2e01574eeebdb102948a58740dcaf83" -uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" -version = "2023.1.0+0" - [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -496,12 +286,6 @@ git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" version = "0.2.2" -[[deps.IterativeSolvers]] -deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"] -git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c" -uuid = "42fd0dbc-a981-5370-80f2-aaf504508153" -version = "0.9.2" - [[deps.IteratorInterfaceExtensions]] git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" @@ -519,24 +303,6 @@ git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.4.1" -[[deps.JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.4" - -[[deps.JuMP]] -deps = ["LinearAlgebra", "MathOptInterface", "MutableArithmetics", "OrderedCollections", "Printf", "SnoopPrecompile", "SparseArrays"] -git-tree-sha1 = "3e4a73edf2ca1bfe97f1fc86eb4364f95ef0fccd" -uuid = "4076af6c-e467-56ae-b986-b466b2749572" -version = "1.11.1" - -[[deps.Knockoffs]] -deps = ["BlockDiagonals", "CSV", "Clustering", "CovarianceEstimation", "DataFrames", "DelimitedFiles", "Distributions", "Downloads", "ElasticArrays", "GLM", "GLMNet", "Hypatia", "JuMP", "LinearAlgebra", "LoopVectorization", "LowRankApprox", "Optim", "PositiveFactorizations", "ProgressMeter", "Random", "Reexport", "Roots", "SnpArrays", "Statistics", "StatsBase"] -git-tree-sha1 = "bedba256e51c9c0b6f330268252243545ee8c16a" -uuid = "878bf26d-0c49-448a-9df5-b057c815d613" -version = "1.0.2" - [[deps.LLVMOpenMP_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b" @@ -548,16 +314,6 @@ git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" version = "1.3.0" -[[deps.LayoutPointers]] -deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] -git-tree-sha1 = "88b8f66b604da079a627b6fb2860d3704a6729a1" -uuid = "10f19ff3-798f-405d-979b-55457f8fc047" -version = "0.1.14" - -[[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" @@ -580,12 +336,6 @@ version = "1.10.2+0" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" -[[deps.Libiconv_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "c7cb1f5d892775ba13767a87c7ada0b980ea0a71" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.16.1+2" - [[deps.LineSearches]] deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d" @@ -596,12 +346,6 @@ version = "7.2.0" deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -[[deps.LinearMaps]] -deps = ["ChainRulesCore", "LinearAlgebra", "SparseArrays", "Statistics"] -git-tree-sha1 = "4af48c3585177561e9f0d24eb9619ad3abf77cc7" -uuid = "7a12625a-238d-50fd-b39a-03d52299707e" -version = "3.10.0" - [[deps.LogExpFunctions]] deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] git-tree-sha1 = "0a1b7c2863e44523180fdb3146534e265a91870b" @@ -621,50 +365,16 @@ version = "0.3.23" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[deps.LoopVectorization]] -deps = ["ArrayInterface", "ArrayInterfaceCore", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] -git-tree-sha1 = "3bb62b5003bc7d2d49f26663484267dc49fa1bf5" -uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -version = "0.12.159" -weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] - - [deps.LoopVectorization.extensions] - ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] - SpecialFunctionsExt = "SpecialFunctions" - -[[deps.LowRankApprox]] -deps = ["FFTW", "FillArrays", "LinearAlgebra", "Nullables", "Random", "SparseArrays"] -git-tree-sha1 = "bc0221650552251852342c47c2e82246ed5c98ff" -uuid = "898213cb-b102-5a47-900c-97e73b919f73" -version = "0.5.3" - -[[deps.MKL_jll]] -deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] -git-tree-sha1 = "2ce8695e1e699b68702c03402672a69f54b8aca9" -uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2022.2.0+0" - [[deps.MacroTools]] deps = ["Markdown", "Random"] git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" version = "0.5.10" -[[deps.ManualMemory]] -git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" -uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" -version = "0.1.8" - [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" -[[deps.MathOptInterface]] -deps = ["BenchmarkTools", "CodecBzip2", "CodecZlib", "DataStructures", "ForwardDiff", "JSON", "LinearAlgebra", "MutableArithmetics", "NaNMath", "OrderedCollections", "PrecompileTools", "Printf", "SparseArrays", "SpecialFunctions", "Test", "Unicode"] -git-tree-sha1 = "6ba094e471106981b278f60179170d8b10985052" -uuid = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" -version = "1.16.0" - [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" @@ -683,12 +393,6 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" uuid = "14a3606d-f60d-562e-9121-12d972cd8159" version = "2022.10.11" -[[deps.MutableArithmetics]] -deps = ["LinearAlgebra", "SparseArrays", "Test"] -git-tree-sha1 = "964cb1a7069723727025ae295408747a0b36a854" -uuid = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" -version = "1.3.0" - [[deps.NLSolversBase]] deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" @@ -701,27 +405,10 @@ git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" version = "1.0.2" -[[deps.NearestNeighbors]] -deps = ["Distances", "StaticArrays"] -git-tree-sha1 = "2c3726ceb3388917602169bed973dbc97f1b51a8" -uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce" -version = "0.4.13" - [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" -[[deps.Nullables]] -git-tree-sha1 = "8f87854cc8f3685a60689d8edecaa29d2251979b" -uuid = "4d1e1d77-625e-5b40-9113-a560ec7a8ecd" -version = "1.0.0" - -[[deps.OffsetArrays]] -deps = ["Adapt"] -git-tree-sha1 = "82d7c9e310fe55aa54996e6f7f94674e2a38fcb4" -uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.12.9" - [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" @@ -778,17 +465,6 @@ deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", " uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" version = "1.9.0" -[[deps.PolyesterWeave]] -deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] -git-tree-sha1 = "240d7170f5ffdb285f9427b92333c3463bf65bf6" -uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" -version = "0.2.1" - -[[deps.PolynomialRoots]] -git-tree-sha1 = "5f807b5345093487f733e520a1b7395ee9324825" -uuid = "3a141323-8675-5d76-9d11-e1df1406c778" -version = "1.0.0" - [[deps.PooledArrays]] deps = ["DataAPI", "Future"] git-tree-sha1 = "a6062fe4063cdafe78f4a0a81cfffb89721b30e7" @@ -823,16 +499,6 @@ version = "2.2.4" deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[deps.Profile]] -deps = ["Printf"] -uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" - -[[deps.ProgressMeter]] -deps = ["Distributed", "Printf"] -git-tree-sha1 = "d7a7aef8f8f2d537104f170139553b14dfe39fe9" -uuid = "92933f4c-e287-5a05-a399-4b506db050ca" -version = "1.7.2" - [[deps.QuadGK]] deps = ["DataStructures", "LinearAlgebra"] git-tree-sha1 = "6ec7ac8412e83d57e313393220879ede1740f9ee" @@ -847,12 +513,6 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[deps.RecipesBase]] -deps = ["PrecompileTools"] -git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" -uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -version = "1.3.4" - [[deps.Reexport]] git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" @@ -864,12 +524,6 @@ git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" uuid = "ae029012-a4dd-5104-9daa-d747884805df" version = "1.3.0" -[[deps.Richardson]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "e03ca566bec93f8a3aeb059c8ef102f268a38949" -uuid = "708f8203-808e-40c0-ba2d-98a6953ed40d" -version = "1.4.0" - [[deps.Rmath]] deps = ["Random", "Rmath_jll"] git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" @@ -882,47 +536,10 @@ git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da" uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" version = "0.4.0+0" -[[deps.Roots]] -deps = ["ChainRulesCore", "CommonSolve", "Printf", "Setfield"] -git-tree-sha1 = "a404e6b2c0c5364ab45deaf5af8baa95bdc7e0b8" -uuid = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" -version = "2.0.16" - - [deps.Roots.extensions] - RootsForwardDiffExt = "ForwardDiff" - RootsIntervalRootFindingExt = "IntervalRootFinding" - - [deps.Roots.weakdeps] - ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" - IntervalRootFinding = "d2bf35a9-74e0-55ec-b149-d360ff49b807" - [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" -[[deps.SIMD]] -deps = ["PrecompileTools"] -git-tree-sha1 = "0e270732477b9e551d884e6b07e23bb2ec947790" -uuid = "fdea26ae-647d-5447-a871-4b548cad5224" -version = "3.4.5" - -[[deps.SIMDTypes]] -git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" -uuid = "94e857df-77ce-4151-89e5-788b33177be4" -version = "0.1.0" - -[[deps.SLEEFPirates]] -deps = ["IfElse", "Static", "VectorizationBase"] -git-tree-sha1 = "4b8586aece42bee682399c4c4aee95446aa5cd19" -uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" -version = "0.6.39" - -[[deps.ScanByte]] -deps = ["Libdl", "SIMD"] -git-tree-sha1 = "2436b15f376005e8790e318329560dcc67188e84" -uuid = "7b38b023-a4d7-4c5e-8d43-3f3097f304eb" -version = "0.3.3" - [[deps.SentinelArrays]] deps = ["Dates", "Random"] git-tree-sha1 = "77d3c4726515dca71f6d80fbb5e251088defe305" @@ -938,23 +555,12 @@ git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" version = "1.1.1" -[[deps.ShiftedArrays]] -git-tree-sha1 = "503688b59397b3307443af35cd953a13e8005c16" -uuid = "1277b4bf-5013-50f5-be3d-901d8477a67a" -version = "2.0.0" - [[deps.SnoopPrecompile]] deps = ["Preferences"] git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c" uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c" version = "1.0.3" -[[deps.SnpArrays]] -deps = ["Adapt", "CSV", "CodecBzip2", "CodecXz", "CodecZlib", "CodecZstd", "DataFrames", "DelimitedFiles", "Glob", "LinearAlgebra", "LoopVectorization", "Missings", "Mmap", "Printf", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "Tables", "TranscodingStreams", "VariantCallFormat", "VectorizationBase"] -git-tree-sha1 = "ea67d0d40d3813b7846d189771a67874c7b8b7cc" -uuid = "4e780e97-f5bf-4111-9dc4-b70aaf691b06" -version = "0.3.19" - [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" @@ -973,27 +579,12 @@ deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_j git-tree-sha1 = "ef28127915f4229c971eb43f3fc075dd3fe91880" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" version = "2.2.0" -weakdeps = ["ChainRulesCore"] [deps.SpecialFunctions.extensions] SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" -[[deps.Static]] -deps = ["IfElse"] -git-tree-sha1 = "dbde6766fc677423598138a5951269432b0fcc90" -uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" -version = "0.8.7" - -[[deps.StaticArrayInterface]] -deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "Requires", "SnoopPrecompile", "SparseArrays", "Static", "SuiteSparse"] -git-tree-sha1 = "33040351d2403b84afce74dae2e22d3f5b18edcb" -uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" -version = "1.4.0" -weakdeps = ["OffsetArrays", "StaticArrays"] - - [deps.StaticArrayInterface.extensions] - StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" - StaticArrayInterfaceStaticArraysExt = "StaticArrays" + [deps.SpecialFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" [[deps.StaticArrays]] deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] @@ -1037,18 +628,6 @@ version = "1.3.0" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" -[[deps.StatsModels]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Printf", "REPL", "ShiftedArrays", "SparseArrays", "StatsBase", "StatsFuns", "Tables"] -git-tree-sha1 = "8cc7a5385ecaa420f0b3426f9b0135d0df0638ed" -uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -version = "0.7.2" - -[[deps.StringEncodings]] -deps = ["Libiconv_jll"] -git-tree-sha1 = "33c0da881af3248dafefb939a21694b97cfece76" -uuid = "69024149-9ee7-55f6-a4c4-859efe599b68" -version = "0.3.6" - [[deps.StringManipulation]] git-tree-sha1 = "46da2434b41f41ac3594ee9816ce5541c6096123" uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" @@ -1094,12 +673,6 @@ git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf" uuid = "b718987f-49a8-5099-9789-dcd902bef87d" version = "1.0.1" -[[deps.ThreadingUtilities]] -deps = ["ManualMemory"] -git-tree-sha1 = "c97f60dd4f2331e1a495527f80d242501d2f9865" -uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" -version = "0.5.1" - [[deps.TranscodingStreams]] deps = ["Random", "Test"] git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" @@ -1118,18 +691,6 @@ version = "1.0.2" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" -[[deps.VariantCallFormat]] -deps = ["Automa", "BGZFStreams", "BioCore", "BufferedStreams"] -git-tree-sha1 = "3f459ba763786964d472558c4b223bd332e89f24" -uuid = "28eba6e3-a997-4ad9-87c6-d933b8bca6c1" -version = "0.5.3" - -[[deps.VectorizationBase]] -deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] -git-tree-sha1 = "b182207d4af54ac64cbc71797765068fdeff475d" -uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" -version = "0.21.64" - [[deps.WeakRefStrings]] deps = ["DataAPI", "InlineStrings", "Parsers"] git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" @@ -1141,29 +702,11 @@ git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" version = "1.6.1" -[[deps.XZ_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "7928d348322698fb93d5c14b184fdc176c8afc82" -uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" -version = "5.2.9+0" - -[[deps.YAML]] -deps = ["Base64", "Dates", "Printf", "StringEncodings"] -git-tree-sha1 = "dbc7f1c0012a69486af79c8bcdb31be820670ba2" -uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" -version = "0.4.8" - [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" version = "1.2.13+0" -[[deps.Zstd_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c" -uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" -version = "1.5.5+0" - [[deps.ghostbasil_jll]] deps = ["Artifacts", "JLLWrappers", "LLVMOpenMP_jll", "Libdl", "Pkg", "libcxxwrap_julia_jll"] git-tree-sha1 = "ededaa9278aa74d362ad3f1cc3f91dd3cabcd68f" @@ -1172,12 +715,6 @@ repo-url = "https://github.com/biona001/ghostbasil_jll.jl" uuid = "caeec347-1c5e-53ac-8e86-7f5e1a690164" version = "0.0.7+0" -[[deps.glmnet_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "a88d1783391cea1503e092e8a346751ec5e3b5d1" -uuid = "78c6b45d-5eaf-5d68-bcfb-a5a2cb06c27f" -version = "5.0.0+0" - [[deps.libblastrampoline_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" diff --git a/Project.toml b/Project.toml index 77424a6e..8be665a3 100644 --- a/Project.toml +++ b/Project.toml @@ -12,10 +12,10 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Ghostbasil = "42a9b20f-2d23-465a-b7ed-975dbff4a4d6" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -Knockoffs = "878bf26d-0c49-448a-9df5-b057c815d613" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" ghostbasil_jll = "caeec347-1c5e-53ac-8e86-7f5e1a690164" [compat] @@ -27,7 +27,6 @@ Distributions = "0.25" Ghostbasil = "0.0.1" HDF5 = "0.16" JLD2 = "0.4" -Knockoffs = "1.1" LinearAlgebra = "1" Optim = "1.7" Random = "1" diff --git a/src/GhostKnockoffGWAS.jl b/src/GhostKnockoffGWAS.jl index b60d72c5..bb08dc88 100644 --- a/src/GhostKnockoffGWAS.jl +++ b/src/GhostKnockoffGWAS.jl @@ -1,6 +1,5 @@ module GhostKnockoffGWAS -using Knockoffs using Ghostbasil using JLD2 using HDF5 @@ -12,6 +11,7 @@ using Distributions using DelimitedFiles using Optim using ArgParse +using Statistics export ghostbasil, ghostbasil_parallel, diff --git a/src/app.jl b/src/app.jl index 6074948b..2de92606 100644 --- a/src/app.jl +++ b/src/app.jl @@ -41,7 +41,7 @@ end function parse_commandline() s = ArgParseSettings() - @add_arg_table s begin + @add_arg_table! s begin "--zfile" help = "Tab or comma separated summary Z-score file, which can be " * ".gz compressed. The first row must be a header line that " * diff --git a/src/ghostbasil_parallel.jl b/src/ghostbasil_parallel.jl index 1cc0638e..c7bb85d3 100644 --- a/src/ghostbasil_parallel.jl +++ b/src/ghostbasil_parallel.jl @@ -114,7 +114,7 @@ function ghostbasil_parallel( end # sample ghost knockoffs knockoffs Random.seed!(seed) - t22 += @elapsed Zko_train = Knockoffs.sample_mvn_efficient(Σi, Si, m + 1) + t22 += @elapsed Zko_train = sample_mvn_efficient(Σi, Si, m + 1) t23 += @elapsed Σi_inv = inv(Symmetric(Σi)) t24 += @elapsed Zko = ghost_knockoffs(zscore_tmp, Si, Σi_inv, m=m) end @@ -196,7 +196,7 @@ function ghostbasil_parallel( push!(T_groupk[k], sum(abs, @view(Tk[k][idx]))) end end - kappa, tau, W = Knockoffs.MK_statistics(T_group0, T_groupk) + kappa, tau, W = MK_statistics(T_group0, T_groupk) # save analysis result df[!, :group] = groups[original_idx] @@ -214,7 +214,7 @@ function ghostbasil_parallel( df[!, :pvals] = zscore2pval(df[!, :zscores]) qs, num_selected = Float64[], Int[] for fdr in target_fdrs - q = mk_threshold(tau, kappa, m, fdr) #todo: this step is slow? + q = mk_threshold(tau, kappa, m, fdr) selected = zeros(Int, size(df, 1)) selected[findall(x -> x ≥ q, W)] .= 1 df[!, "selected_fdr$fdr"] = selected diff --git a/src/utilities.jl b/src/utilities.jl index 7cb3d091..780ce553 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -205,3 +205,168 @@ end function check_all_snps_are_unique(chr, pos, effect_allele, non_effect_allele) return nothing # todo end + +""" + sample_mvn_efficient(C::AbstractMatrix{T}, D::AbstractMatrix{T}, m::Int) + +Efficiently samples from `N(0, A)` where +```math +\\begin{aligned} +A &= \\begin{pmatrix} + C & C-D & \\cdots & C-D\\\\ + C-D & C & \\cdots & C-D\\\\ + \\vdots & & \\ddots & \\vdots\\\\ + C-D & C-D & & C +\\end{pmatrix} +\\end{aligned} +``` +Note there are `m` blocks per row/col + +# Source +https://github.com/biona001/Knockoffs.jl/blob/master/src/ghost.jl#L60 +""" +function sample_mvn_efficient(C::AbstractMatrix{T}, D::AbstractMatrix{T}, m::Int) where T + p = size(C, 1) + L = cholesky(Symmetric(C - (m-1)/m * D)) + e1 = randn(p) + e2 = Vector{T}[] + d = MvNormal(Symmetric(D)) + for i in 1:m + push!(e2, rand(d)) + end + e2_avg = 1/m * sum(e2) + Zko = T[] + for i in 1:m + append!(Zko, L.L*e1 + e2[i] - e2_avg) + end + return Zko +end + +""" + ghost_knockoffs(Zscores, D, Σinv; [m=1]) + +Generate Ghost knockoffs given a list of z-scores (GWAS summary statistic). + +# Inputs ++ `Zscores`: List of z-score statistics ++ `D`: Matrix obtained from solving the knockoff problem satisfying + `(m+1)/m*Σ - D ⪰ 0` ++ `Σinv`: Inverse of the covariance matrix + +# optional inputs ++ `m`: Number of knockoffs + +# Reference +He, Z., Liu, L., Belloy, M. E., Le Guen, Y., Sossin, A., Liu, X., ... & Ionita-Laza, I. (2021). +Summary statistics knockoff inference empowers identification of putative causal variants in +genome-wide association studies. + +# Source +https://github.com/biona001/Knockoffs.jl/blob/master/src/ghost.jl#L32 +""" +function ghost_knockoffs(Zscores::AbstractVector{T}, D::AbstractMatrix{T}, + Σinv::AbstractMatrix{T}; m::Int = 1) where T + p = size(D, 1) + length(Zscores) == size(Σinv, 1) == size(Σinv, 2) == p || + error("Dimension mismatch") + DΣinv = D * Σinv + C = 2D - DΣinv * D + v = sample_mvn_efficient(C, D, m) # Jiaqi's trick + P = repeat(I - DΣinv, m) + return P*Zscores + v +end + +""" + MK_statistics(T0::Vector, Tk::Vector{Vector}; filter_method) + +Computes the multiple knockoff statistics kappa, tau, and W. + +# Inputs ++ `T0`: p-vector of importance score for original variables ++ `Tk`: Vector storing T1, ..., Tm, where Ti is importance scores for + the `i`th knockoff copy ++ `filter_method`: Either `Statistics.median` (default) or max (original + function used in 2019 Gimenez and Zou) + +# output ++ `κ`: Index of the most significant feature (`κ[i] = 0` if original feature most + important, otherwise `κ[i] = k` if the `k`th knockoff is most important) ++ `τ`: `τ[i]` stores the most significant statistic among original and knockoff + variables minus `filter_method()` applied to the remaining statistics. ++ `W`: coefficient difference statistic `W[i] = abs(T0[i]) - abs(Tk[i])` + +# Source +https://github.com/biona001/Knockoffs.jl/blob/master/src/threshold.jl#L96 +""" +function MK_statistics( + T0::Vector{T}, + Tk::Vector{Vector{T}}; + filter_method::Function = Statistics.median + ) where T + p, m = length(T0), length(Tk) + all(p .== length.(Tk)) || error("Length of T0 should equal all vectors in Tk") + κ = zeros(Int, p) # index of largest importance score + τ = zeros(p) # difference between largest importance score and median of remaining + W = zeros(p) # importance score of each feature + storage = zeros(m + 1) + for i in 1:p + storage[1] = abs(T0[i]) + for k in 1:m + if abs(Tk[k][i]) > abs(T0[i]) + κ[i] = k + end + storage[k+1] = abs(Tk[k][i]) + end + W[i] = (storage[1] - filter_method(@view(storage[2:end]))) * (κ[i] == 0) + sort!(storage, rev=true) + τ[i] = storage[1] - filter_method(@view(storage[2:end])) + end + return κ, τ, W +end + +""" + mk_threshold(τ::Vector{T}, κ::Vector{Int}, m::Int, q::Number) + +Chooses the multiple knockoff threshold `τ̂ > 0` by setting +τ̂ = min{ t > 0 : (1/m + 1/m * {#j: κ[j] ≥ 1 and W[j] ≥ t}) / {#j: κ[j] == 0 and W[j] ≥ τ̂} ≤ q }. + +# Inputs ++ `τ`: τ[i] stores the feature importance score for the ith feature, i.e. the value + T0 - median(T1,...,Tm). Note in Gimenez and Zou, the max function is used + instead of median ++ `κ`: κ[i] stores which of m knockoffs has largest importance score. When original + variable has largest score, κ[i] == 0. ++ `m`: Number of knockoffs per variable generated ++ `q`: target FDR (between 0 and 1) ++ `rej_bounds`: Number of values of top τ to consider (default = 10000) + +# Reference: ++ Equations 8 and 9 in supplement of "Identification of putative causal loci in + wholegenome sequencing data via knockoff statistics" by He et al. ++ Algorithm 1 of "Improving the Stability of the Knockoff Procedure: Multiple + Simultaneous Knockoffs and Entropy Maximization" by Gimenez and Zou. + +# Source +https://github.com/biona001/Knockoffs.jl/blob/master/src/threshold.jl#L55 +""" +function mk_threshold(τ::Vector{T}, κ::Vector{Int}, m::Int, q::Number, + method=:knockoff_plus, rej_bounds::Int=10000 + ) where T <: AbstractFloat + 0 ≤ q ≤ 1 || error("Target FDR should be between 0 and 1 but got $q") + method == :knockoff_plus || error("Multiple knockoffs needs to use :knockoff_plus filtering method") + length(τ) == length(κ) || error("Length of τ and κ should be the same") + p = length(τ) # number of features + τ̂ = typemax(T) + offset = 1 / m + for (i, t) in enumerate(sort(τ, rev=true)) + numer_counter, demon_counter = 0, 0 + for i in 1:p + κ[i] ≥ 1 && τ[i] ≥ t && (numer_counter += 1) + κ[i] == 0 && τ[i] ≥ t && (demon_counter += 1) + end + ratio = (offset + offset * numer_counter) / max(1, demon_counter) + ratio ≤ q && 0 < t < τ̂ && (τ̂ = t) + i > rej_bounds && break + end + return τ̂ +end