Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update docstrings and add type hint #104

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9"]
python-version: ["3.9", "3.10"]
max-parallel: 5

steps:
Expand Down
2 changes: 1 addition & 1 deletion build-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- numpy
- pip
- pytest-cov
- python==3.8
- python==3.9
- scipy
- snakemake==7.1.1
- pip:
Expand Down
163 changes: 109 additions & 54 deletions dadi_cli/BestFit.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,51 @@
import glob, sys
import numpy as np
from typing import Optional
from dadi_cli.Models import get_model
from dadi_cli.Pdfs import get_dadi_pdf_params


def get_bestfit_params(path, lbounds, ubounds, output, delta, Nclose=3, Nbest=100):
def get_bestfit_params(
path: str,
lbounds: list[float],
ubounds: list[float],
output: str,
delta: float,
Nclose: int = 3,
Nbest: int = 100
) -> Optional[np.array]:
"""
Description:
Obtains bestfit parameters.

Arguments:
path str: Path to results of inference.
lbounds list: Lower bounds of the optimized parameters.
ubounds list: Upper bounds of the optimized parameters.
output str: Name of the output file.
delta float: Max percentage difference for log-likliehoods compared to the best optimization
log-likliehood to be consider convergent.
Nclose int: Number of best-fit results to be consider convergent.
Nbest int: Number of best-fit results to be displayed.
Obtains best-fit parameters from optimization results, filters them based on log-likelihood
differences, boundaries, and ranks them, finally outputs the results to a specified file.

Parameters
----------
path : str
Path to results of inference.
lbounds : list[float]
Lower bounds of the optimized parameters.
ubounds : list[float]
Upper bounds of the optimized parameters.
output : str
Name of the output file where results will be written.
delta : float
Maximum difference for log-likelihoods compared to the best optimization
log-likelihood to be considered convergent.
Nclose : int, optional
Number of best-fit results to be considered convergent (default is 3).
Nbest : int, optional
Number of best-fit results to be displayed (default is 100).

Returns
-------
np.array | None
Array of close enough results if the convergence criteria are met, None otherwise.

Raises
------
ValueError
If no files are found at the specified path or if an incorrect path naming convention is used.

"""
files = glob.glob(path)
if files == []:
Expand All @@ -26,23 +54,21 @@
)
res, comments = [], []

for f in files:
fid = open(f, "r")
for line in fid.readlines():
if line.startswith("#"):
if line.startswith("# Log(likelihood)"):
params = line.rstrip()
else:
comments.append(line.rstrip())
continue
# Parse numerical result
try:

res.append([float(_) for _ in line.rstrip().split()])
except ValueError:
# Ignore lines with a parsing error
pass
fid.close()
for file in files:
with open(file, "r") as fid:
for line in fid.readlines():
if line.startswith("#"):
if line.startswith("# Log(likelihood)"):
params = line.rstrip()
else:
comments.append(line.rstrip())
continue
# Parse numerical result
try:
res.append([float(_) for _ in line.rstrip().split()])
except ValueError:
# Ignore lines with a parsing error
pass

if len(res) == 0:
print("No optimization results found.")
Expand All @@ -58,7 +84,7 @@
try:
opt_ll = res[0][0]
except IndexError:
return None
return

Check warning on line 87 in dadi_cli/BestFit.py

View check run for this annotation

Codecov / codecov/patch

dadi_cli/BestFit.py#L87

Added line #L87 was not covered by tests

# Filter out those results within delta threshold
close_enough = res[1 - (opt_ll / res[:, 0]) <= delta]
Expand Down Expand Up @@ -91,43 +117,72 @@
return close_enough


def close2boundaries(params, lbounds, ubounds):
def close2boundaries(
params: list[float],
lbounds: list[float],
ubounds: list[float],
threshold: float = 0.01
) -> bool:
"""
Description:
Helper function for detemining whether a parameter is close to the boundaries.
Determines whether any parameter is close to its boundaries within a specified threshold.

Parameters
----------
params : list[float]
Inferred parameters.
lbounds : list[float]
Lower bounds for the parameters.
ubounds : list[float]
Upper bounds for the parameters.
threshold : float, optional
Proportion of the boundary range that defines closeness (default is 0.01).

Returns
-------
bool
True if any parameter is within the threshold of its boundaries, False otherwise.

Arguments:
params list: Inferred parameters.
lbounds list: Lower bounds for the parameters.
ubounds list: Upper bounds for the parameters.

Returns:
is_close2boundaries: True, if any parameter is close to the boundaries;
False, otherwise.
"""
is_close2boundaries = False
for i in range(len(params)):
if ubounds[i] is not None and lbounds[i] is not None:
bound_range = ubounds[i] - lbounds[i]
if (params[i] - lbounds[i]) / bound_range < 0.01 or (
if (params[i] - lbounds[i]) / bound_range < threshold or (
ubounds[i] - params[i]
) / bound_range < 0.01:
) / bound_range < threshold:
is_close2boundaries = True
return is_close2boundaries


def boundary_filter(res, ubounds, lbounds):
def boundary_filter(
res: np.array,
ubounds: list[float],
lbounds: list[float]
) -> np.array:
"""
Description:
Helper function to filter out results where the params are outside the boundaries.

Arguments:
res numpy.array: Inference results stored as an array.
lbounds list: Lower bounds for the parameters.
ubounds list: Upper bounds for the parameters.
Filters inference results to exclude those where any parameter is outside specified boundaries.

Parameters
----------
res : np.array
Inference results stored as an array, where each row represents an inference result and
columns correspond to different parameters.
lbounds : list[float]
Lower bounds for each parameter.
ubounds : list[float]
Upper bounds for each parameter.

Returns
-------
np.array
Array with rows filtered based on boundaries.

Raises
------
ValueError
If the number of upper boundaries does not match the number of lower boundaries,
or if the number of boundaries does not match the number of parameters.

Returns:
res: Numpy array with values filtered based on boundaries
"""
# Filter out results where the params are outside the boundaries
# Doing this before getting opt_ll so it doesn't throw off convergence check
Expand Down
73 changes: 45 additions & 28 deletions dadi_cli/GenerateCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,54 @@


def generate_cache(
func,
grids,
popt,
gamma_bounds,
gamma_pts,
additional_gammas,
output,
sample_sizes,
cpus,
gpus,
dimensionality,
):
func: callable,
grids: list[int],
popt: str,
gamma_bounds: list[str],
gamma_pts: int,
additional_gammas: list[float],
output: str,
sample_sizes: list[int],
cpus: int,
gpus: int,
dimensionality: int,
) -> None:
"""
Description:
Generates caches of frequency spectra for DFE inference.
Generates caches of frequency spectra for DFE inference using demographic models.

Parameters
----------
func : callable
A callable demographic model function from DFE.DemogSelModels.
grids : list[int]
Grid sizes for the frequency spectrum calculation.
popt : str
File name containing demographic parameters for inference.
gamma_bounds : list[str]
Range of population-scaled selection coefficients, specified as strings.
gamma_pts : int
Number of grid points for gamma integration.
additional_gammas : list[float]
List of additional gamma values to cache.
output : str
Output file name where the cache will be saved.
sample_sizes : list[int]
List of population sample sizes.
cpus : int
Number of CPUs to utilize.
gpus : int
Number of GPUs to utilize.
dimensionality : int
Dimensionality of the frequency spectrum (must be 1 or 2).

Raises
------
ValueError
If the dimensionality is not 1 or 2.

Arguments:
func function: dadi demographic models.
grids list: Grid sizes.
popt str: Name of the file containing demographic parameters for the inference.
gamma_bounds list: Range of population-scaled selection coefficients to cache.
gamma_pts int: Number of gamma grid points over which to integrate.
additional_gammas list: Additional positive population-scaled selection coefficients to cache for.
output str: Name of the output file.
sample_sizes list: Sample sizes of populations.
cpus int: Number of CPUs to use in cache generation.
gpus int: Number of GPUs to use in cache generation.
dimensionality int: Dimensionality of the frequency spectrum.
"""
if dimensionality not in [1, 2]:
raise ValueError(f"Invalid dimensionality {dimensionality}. Only 1 or 2 are accepted.")

if func is not getattr(DFE.DemogSelModels, 'equil'):
popt, theta = get_opts_and_theta(popt, gen_cache=True)
Expand Down Expand Up @@ -69,8 +88,6 @@ def generate_cache(
cpus=cpus,
gpus=gpus
)
else:
raise ValueError("--dimensionality only accepts 1 or 2.")

if (spectra.spectra < 0).sum() > 0:
print(
Expand Down
Loading
Loading