Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable setting the count prior for empty droplets #233

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cellbender/remove_background/argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ def add_subparser_args(subparsers: argparse) -> argparse:
help="Number of cells expected in the dataset "
"(a rough estimate within a factor of 2 "
"is sufficient).")
subparser.add_argument("--expected-ambient-size", nargs=None, type=int,
default=None,
dest="expected_ambient_size",
help="Prior for the number of counts expected in empty "
"droplets.")
subparser.add_argument("--total-droplets-included",
nargs=None, type=int,
default=consts.TOTAL_DROPLET_DEFAULT,
Expand Down
6 changes: 6 additions & 0 deletions cellbender/remove_background/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ def validate_args(self, args):
assert args.total_droplets > args.expected_cell_count, \
f"total_droplets must be an integer greater than the input " \
f"expected_cell_count, which is {args.expected_cell_count}."

# If expected counts for empty drolets are specified, it should be positive
if (args.expected_ambient_size is not None):
assert args.expected_ambient_size > 0, \
f"expected_ambient_size must be an integer greater than 0."

assert (args.fraction_empties > 0) and (args.fraction_empties < 1), \
"fraction_empties must be between 0 and 1, exclusive. This is " \
Expand Down Expand Up @@ -150,6 +155,7 @@ def run_remove_background(args):
dataset_obj = \
SingleCellRNACountsDataset(input_file=args.input_file,
expected_cell_count=args.expected_cell_count,
expected_ambient_size=args.expected_ambient_size,
total_droplet_barcodes=args.total_droplets,
fraction_empties=args.fraction_empties,
model_name=args.model,
Expand Down
25 changes: 15 additions & 10 deletions cellbender/remove_background/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self,
low_count_threshold: int,
fpr: List[float],
expected_cell_count: Optional[int] = None,
expected_ambient_size: Optional[int] = None,
total_droplet_barcodes: int = consts.TOTAL_DROPLET_DEFAULT,
fraction_empties: Optional[float] = None,
gene_blacklist: List[int] = []):
Expand All @@ -83,7 +84,8 @@ def __init__(self,
self.fraction_empties = fraction_empties
self.is_trimmed = False
self.low_count_threshold = low_count_threshold
self.priors = {'n_cells': expected_cell_count} # Expected cells could be None.
self.priors = {'n_cells': expected_cell_count, # Expected cells could be None.
'empty_counts': expected_ambient_size}
self.posterior = None
self.fpr = fpr
self.random = np.random.RandomState(seed=1234)
Expand Down Expand Up @@ -1425,17 +1427,20 @@ def get_d_priors_from_dataset(dataset: SingleCellRNACountsDataset) \

# Models that include both cells and empty droplets.
else:

if dataset.priors['empty_counts'] is None:
# Cutoff for original data. Empirical.
cut = dataset.low_count_threshold

# Cutoff for original data. Empirical.
cut = dataset.low_count_threshold

# Estimate the number of UMI counts in empty droplets.
# Estimate the number of UMI counts in empty droplets.

# Mode of (rounded) log counts (for counts > cut) is a robust
# empty estimator.
empty_log_counts = mode(np.round(np.log1p(counts[counts > cut]),
decimals=1))[0]
empty_counts = int(np.expm1(empty_log_counts).item())
# Mode of (rounded) log counts (for counts > cut) is a robust
# empty estimator.
empty_log_counts = mode(np.round(np.log1p(counts[counts > cut]),
decimals=1))[0]
empty_counts = int(np.expm1(empty_log_counts).item())
else:
empty_counts = dataset.priors['empty_counts']

# Estimate the number of UMI counts in cells.

Expand Down