Add ability to pass in theta and theta_ns for SimulateDM and SimulateDFE

xin-huang · Jan 15, 2025 · a92d027 · a92d027
1 parent 84c2e4b
commit a92d027
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 24 deletions.
diff --git a/dadi_cli/InferDFE.py b/dadi_cli/InferDFE.py
@@ -82,7 +82,7 @@ def infer_dfe(
 
     """
     if not cache1d and not cache2d:
-        raise ValueError("At least one of cache1d or cache2d must be provided.")
+        raise ValueError("\nAt least one of cache1d or cache2d must be provided.")
 
     # Randomize starting parameter values
     if seed is not None:
@@ -116,7 +116,7 @@ def infer_dfe(
         try:
             from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
         except ModuleNotFoundError:
-            raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
+            raise ImportError("\nCurrent dadi version does not support coverage model\n")
         nseq = [int(ele) for ele in cov_args[1:]]
         if cov_inbreeding == []:
             Fx = None

diff --git a/dadi_cli/SimulateFs.py b/dadi_cli/SimulateFs.py
@@ -13,6 +13,7 @@ def simulate_demography(
     ns: int, 
     pts_l: tuple[int],
     misid: bool, 
+    theta: float, 
     output: str, 
     inference_file: bool
 ) -> None:
@@ -33,6 +34,8 @@ def simulate_demography(
         Grid sizes for modeling. If None, a default is calculated based on ns.
     misid : bool
         If True, adds a parameter for modeling ancestral state misidentification.
+    theta : float
+        Set the theta value for the simulated demographic SFS. Default 1.
     output : str
         Name of the output file.
     inference_file : bool
@@ -49,7 +52,7 @@ def simulate_demography(
         func = dadi.Numerics.make_anc_state_misid_func(func)
         func.__param_names__ = param_names + ["misid"]
     func_ex = dadi.Numerics.make_extrap_func(func)
-    fs = func_ex(p0, ns, pts_l)
+    fs = func_ex(p0, ns, pts_l) * theta
     fs.to_file(output)
 
     if inference_file:
@@ -58,7 +61,7 @@ def simulate_demography(
                 "# Ran SimulateDM\n# This is a fake inference output results to generate caches\n# Log(likelihood)   "
                 + "\t".join(func.__param_names__)
                 + "\ttheta0\n"
-                + "-0\t" + "\t".join([str(ele) for ele in p0]) + "\t1"
+                + "-0\t" + "\t".join([str(ele) for ele in p0]) + "\t" + str(theta)
             )
 
 
@@ -103,7 +106,7 @@ def simulate_dfe(
     sele_dist: str, 
     sele_dist2: str, 
     pdf_file: str,
-    ratio: float, 
+    theta_ns: float, 
     misid: bool, 
     output: str
 ) -> None:
@@ -122,8 +125,8 @@ def simulate_dfe(
         Name of the 1D Probability Density Function (PDF) for modeling DFEs.
     sele_dist2 : str
         Name of the 2D PDF for modeling DFEs.
-    ratio : float
-        Ratio of synonymous to non-synonymous mutations.
+    theta_ns : float
+        Theta of non-synonymous mutations.
     misid : bool
         If True, adds a parameter for modeling ancestral state misidentification when data are polarized.
     output : str
@@ -152,16 +155,16 @@ def simulate_dfe(
 
     if (cache1d is not None) and (cache2d is not None):
         func = dadi.DFE.Cache2D_mod.mixture
-        func_args = [cache1d, cache2d, sele_dist, sele_dist2, ratio]
+        func_args = [cache1d, cache2d, sele_dist, sele_dist2, theta_ns]
     else:
-        func_args = [sele_dist, ratio]
+        func_args = [sele_dist, theta_ns]
 
     if misid:
         func = dadi.Numerics.make_anc_state_misid_func(func)
-    print(p0, None, sele_dist, ratio, None)
+    print(p0, None, sele_dist, theta_ns, None)
     # Get expected SFS for MLE
     if (cache1d is not None) and (cache2d is not None):
-        fs = func(p0, None, cache1d, cache2d, sele_dist, sele_dist2, ratio, None)
+        fs = func(p0, None, cache1d, cache2d, sele_dist, sele_dist2, theta_ns, None)
     else:
-        fs = func(p0, None, sele_dist, ratio, None)
+        fs = func(p0, None, sele_dist, theta_ns, None)
     fs.to_file(output)
diff --git a/dadi_cli/parsers/infer_dfe_parsers.py b/dadi_cli/parsers/infer_dfe_parsers.py
@@ -84,7 +84,7 @@ def _run_infer_dfe(args: argparse.Namespace) -> None:
     if args.pdf1d == None and args.pdf2d == None:
         raise ValueError("Require --pdf1d and/or --pdf2d depending on DFE model")
     if args.cache1d == None and args.cache2d == None:
-        raise ValueError("cache1d --pdf1d and/or --cache2d depending on DFE model")
+        raise ValueError("Require --cache1d and/or --cache2d depending on DFE model")
     if "://" in args.fs:
         import urllib.request
         sfs_fi = open("sfs.fs","w")

diff --git a/dadi_cli/parsers/simulate_dfe_parsers.py b/dadi_cli/parsers/simulate_dfe_parsers.py
@@ -31,13 +31,18 @@ def _run_simulate_dfe(args: argparse.Namespace) -> None:
             The 2D probability distribution function name for the DFE.
         - pdf_file : str, optional
             Name of file with custom probability density function model(s) in it.
-        - ratio : float
-            Ratio for adjusting the selection parameters, typically used to scale between different
-            types of mutations or fitness effects.
+        - theta_ns : float
+            Nonsynonymous theta for the SFS with selection.
         - nomisid : bool
             Flag to not consider misidentification, which is converted internally to `misid`.
 
     """
+    # Make sure flags are used:
+    if args.cache1d == None and args.cache2d == None:
+        raise ValueError("\nRequire --cache1d and/or --cache2d depending on DFE model")
+    if args.pdf1d == None and args.pdf2d == None:
+        raise ValueError("\nRequire --pdf1d and/or --pdf2d depending on DFE model")
+
     if args.cache1d != None:
         if "://" in args.cache1d:
             from urllib.request import urlopen
@@ -66,7 +71,7 @@ def _run_simulate_dfe(args: argparse.Namespace) -> None:
         sele_dist=args.pdf1d,
         sele_dist2=args.pdf2d,
         pdf_file=args.pdf_file,
-        ratio=args.ratio,
+        theta_ns=args.theta_ns,
         misid=args.misid,
         output=args.output,
     )
@@ -94,11 +99,13 @@ def add_simulate_dfe_parsers(subparsers: argparse.ArgumentParser) -> None:
     )
     add_dfe_argument(parser)
     parser.add_argument(
-        "--ratio",
+        "--theta-ns",
         type=positive_num,
-        dest="ratio",
-        required=True,
-        help="Ratio for the nonsynonymous mutations to the synonymous mutations.",
+        dest="theta_ns",
+        default=1,
+        help="Set the theta for the nonsynonymous mutations.\n" + \
+        "In dadi this is usually determined by the [ demography model theta ] multiplied by the ratio of [ amount of potential neutral SNPs ] to [ potential selective SNPs ];\n" + \
+        "Default: 1.",
     )
     add_p0_argument(parser)
     add_misid_argument(parser)

diff --git a/dadi_cli/parsers/simulate_dm_parsers.py b/dadi_cli/parsers/simulate_dm_parsers.py
@@ -56,6 +56,7 @@ def _run_simulate_dm(args: argparse.Namespace) -> None:
         ns=args.sample_sizes,
         pts_l=args.grids,
         misid=args.misid,
+        theta=args.theta,
         output=args.output,
         inference_file=args.inference_file,
     )
@@ -90,5 +91,12 @@ def add_simulate_dm_parsers(subparsers: argparse.ArgumentParser) -> None:
         action="store_true",
         help='Make an output file like you would get for running InferDM to pass into GenerateCache to make caches with your simulated demographic model. Will be the same name and path as output + ".SimulateFs.pseudofit"; Default: False.',
     )
+    parser.add_argument(
+        "--theta",
+        dest="theta",
+        default=1,
+        type=positive_num,
+        help='Set the theta of the demographic model; Default: 1.',
+    )
     add_output_argument(parser)
     parser.set_defaults(runner=_run_simulate_dm)
diff --git a/tests/test_SimulateFs.py b/tests/test_SimulateFs.py
@@ -24,6 +24,7 @@ def test_simulate_demography_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = False
+    theta = 1
     output = "tests/test_results/simulate_two_epoch.fs"
     inference_file = False
     simulate_demography(
@@ -33,6 +34,7 @@ def test_simulate_demography_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -49,6 +51,7 @@ def test_simulate_custom_demography_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = False
+    theta = 1
     output = "tests/test_results/simulate_three_epoch_bottleneck.fs"
     inference_file = False
     simulate_demography(
@@ -58,6 +61,7 @@ def test_simulate_custom_demography_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -75,6 +79,7 @@ def test_simulate_demography_misid_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = True
+    theta = 1
     output = "tests/test_results/simulate_two_epoch_with_misid.fs"
     inference_file = True
     simulate_demography(
@@ -84,6 +89,7 @@ def test_simulate_demography_misid_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -105,7 +111,7 @@ def test_simulate_dfe_code():
     sele_dist = "lognormal"
     sele_dist2 = "biv_lognormal"
     pdf_file = None
-    theta_ns = 2.31
+    theta_ns = 2.31*1000
     misid = False
     output_1d = "tests/test_results/simulate_dfe_split_mig_one_s_lognormal.fs"
     output_2d = "tests/test_results/simulate_dfe_split_mig_two_s_lognormal.fs"

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -75,6 +75,7 @@ def simulate_args():
     simulate_args.sample_sizes = [10]
     simulate_args.grids = [20, 30, 40]
     simulate_args.nomisid = True
+    simulate_args.theta = 1000
     simulate_args.output = "tests/test_results/main_simulate_two_epoch.fs"
     simulate_args.inference_file = False
 
@@ -91,6 +92,7 @@ def simulate_args():
     simulate_args.sample_sizes = [10]
     simulate_args.grids = [20, 30, 40]
     simulate_args.nomisid = False
+    simulate_args.theta = 1000
     simulate_args.output = "tests/test_results/main_simulate_three_epoch_bottleneck_html.fs"
     simulate_args.inference_file = False
 
@@ -142,7 +144,7 @@ def simulate_args():
     simulate_args.pdf1d = "lognormal"
     simulate_args.pdf2d = "biv_lognormal"
     simulate_args.pdf_file = None
-    simulate_args.ratio = 2.31
+    simulate_args.theta_ns = 2.31*1000
     simulate_args.nomisid = False
     simulate_args.output = "tests/test_results/main_simulate_mix_dfe.fs"
 
@@ -159,7 +161,7 @@ def simulate_args():
     simulate_args.pdf1d = "lognormal"
     simulate_args.pdf2d = "biv_lognormal"
     simulate_args.pdf_file = None
-    simulate_args.ratio = 2.31
+    simulate_args.theta_ns = 2.31*1000
     simulate_args.nomisid = False
     simulate_args.output = "tests/test_results/main_simulate_mix_dfe_html.fs"