Skip to content

Commit

Permalink
add log infos
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahOuologuem committed Apr 9, 2024
1 parent 88ecfb7 commit e7ec9f7
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 45 deletions.
21 changes: 10 additions & 11 deletions panpipes/python_scripts/downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@
help='comma separated string of modalities we want to intersect_obs on')


L.warning("Running filter_mudata")

parser.set_defaults(verbose=True)
args, opt = parser.parse_known_args()
L.info(args)

# load data
L.info("Reading in MuData from '%s'" % args.input_mudata)
mdata = mu.read(args.input_mudata)

if isinstance(mdata, AnnData):
Expand All @@ -56,7 +56,7 @@

orig_obs = mdata.obs

L.info("Number of cells per sample \n%s" % mdata.obs.sample_id.value_counts())
L.info("Before downsampling: number of cells per sample \n%s" % mdata.obs.sample_id.value_counts())


if args.downsample_col == "None":
Expand All @@ -74,7 +74,7 @@
mdata.obs[args.downsample_col] = mdata.obs[args.downsample_col].cat.remove_unused_categories()

# do the downsample.

L.info("Downsampling modalities %s" % mods)
downsample_mudata(mdata, nn=int(args.downsample_value),
cat_col=args.downsample_col,
mods = mods,
Expand All @@ -83,15 +83,14 @@


mdata.update()

L.info("Number of cells per sample")
L.info(mdata.obs.sample_id.value_counts())
L.info("After downsampling: number of cells per sample \n%s" % mdata.obs.sample_id.value_counts())

# write out the observations
write_obs(mdata, output_prefix=re.sub("\\.(.*)", "", args.output_mudata),
output_suffix="_downsampled_cell_metadata.tsv")

prefix = re.sub("\\.(.*)", "", args.output_mudata)
L.info("Saving updated obs in a metadata tsv file to " + prefix + "_downsampled_cell_metadata.tsv")
write_obs(mdata, output_prefix=prefix, output_suffix="_downsampled_cell_metadata.tsv")

L.info("Saving updated MuData to '%s'" % args.output_mudata)
mdata.write(args.output_mudata)
#This stage is the point (i.e. pre-normalisation) where the mdata file can be outputted so that we can extract raw matrix for the cellphonedb.

L.info("Done")
2 changes: 1 addition & 1 deletion panpipes/python_scripts/run_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_matching_df_ignore_cat(new_df, old_df):
# write out obs
output_prefix = re.sub(".h5mu", "", args.output_mudata)

L.info("Saving updated MuData.obs in a metadata tsv file to '" + output_prefix + "_filtered_cell_metadata.tsv'")
L.info("Saving updated obs in a metadata tsv file to '" + output_prefix + "_filtered_cell_metadata.tsv'")
write_obs(mdata, output_prefix=output_prefix, output_suffix="_filtered_cell_metadata.tsv")

# write out the per sample_id cell numbers
Expand Down
2 changes: 1 addition & 1 deletion panpipes/python_scripts/run_filter_spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_matching_df_ignore_cat(new_df, old_df):
# write out obs
output_prefix = re.sub(".h5mu", "", os.path.basename(args.output_mudata))

L.info("Saving updated MuData.obs in a metadata tsv file to './tables/" + output_prefix + "_filtered_cell_metadata.tsv'")
L.info("Saving updated obs in a metadata tsv file to './tables/" + output_prefix + "_filtered_cell_metadata.tsv'")
write_obs(mdata, output_prefix=os.path.join("tables/",output_prefix), output_suffix="_filtered_cell_metadata.tsv")

# write out the per sample_id cell numbers
Expand Down
59 changes: 29 additions & 30 deletions panpipes/python_scripts/run_preprocess_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
sc.settings.figdir = figdir
sc.set_figure_params(scanpy=True, fontsize=14, dpi=300, facecolor='white', figsize=(5,5))

L.info("Reading in MuData from '%s'" % args.input_mudata)
mdata = mu.read(args.input_mudata)

# if we have actually loaded an anndata object, make into a mudata
Expand All @@ -102,10 +103,12 @@
hvg_batch_key=None

# save raw counts as a layer
L.info("Checking if raw data is available")
if X_is_raw(adata):
L.info("Saving raw counts from .X to .layers['raw_counts']")
adata.layers['raw_counts'] = adata.X.copy()
elif "raw_counts" in adata.layers :
L.info("raw_counts layer already exists")
L.info(".layers['raw_counts'] already exists and copying it to .X")
adata.X = adata.layers['raw_counts'].copy()
else:
L.error("X is not raw data and raw_counts layer not found")
Expand All @@ -116,7 +119,7 @@
# except when flavor='seurat_v3' in which count data is expected.
# change the order accordingly
if X_is_raw(adata):
L.info("normalise, log and calculate highly variable genes")
L.info("Normalise, log and calculate HVGs")
if args.flavor == "seurat_v3":
if args.n_top_genes is None:
raise ValueError("if seurat_v3 is used you must give a n_top_genes value")
Expand All @@ -136,51 +139,48 @@
min_disp=float(args.min_disp),
batch_key=hvg_batch_key)
L.debug(adata.uns['log1p'])

if "highly_variable" in adata.var:
L.warning( "You have %s Highly Variable Features", np.sum(adata.var.highly_variable))
L.info( "You have %s HVGs", np.sum(adata.var.highly_variable))
sc.pl.highly_variable_genes(adata,show=False, save ="_genes_highlyvar.png")
else:
sys.exit("I cannot find Highly Variable Features in your RNA")

if isinstance(mdata, mu.MuData):
mdata.update()

# exclude hvgs if there is an exclude file
if args.exclude_file is not None:
if os.path.exists(args.exclude_file):
L.info("exclude genes from hvg")
L.info("Reading in exclude_file from '%s'" % args.exclude_file)
customgenes = pd.read_csv(args.exclude_file)
custom_cat = list(set(customgenes['group'].tolist()))
cat_dic = {}
for cc in custom_cat:
cat_dic[cc] = customgenes.loc[customgenes["group"] == cc,"feature"].tolist()
exclude_action = str(args.exclude)
excl = cat_dic[exclude_action]
L.info(len(excl))
L.info("number of hvgs prior to filtering")
L.info(adata.var.highly_variable.sum())
L.info("Number of HVGs prior to filtering: %s" % adata.var.highly_variable.sum())
adata.var['hvg_exclude'] = [True if gg in excl else False for gg in adata.var.index]
L.debug("num to exclude %s" % (adata.var['hvg_exclude'] & adata.var['highly_variable']).sum() )
L.debug(adata)
L.info("Number of genes to exclude %s" % (adata.var['hvg_exclude'] & adata.var['highly_variable']).sum() )
L.info("Excluding genes from HVG")
if any(adata.var['hvg_exclude'] & adata.var['highly_variable']):
adata.var['highly_variable'].loc[adata.var.hvg_exclude] = False
adata.var.loc[adata.var.hvg_exclude, 'highly_variable_rank'] = np.nan

L.info("number of hvgs after filtering")
L.info(adata.var.highly_variable.sum())
L.info("Number of HVGs after filtering: %s" % adata.var.highly_variable.sum())
sc.pl.highly_variable_genes(adata,show=False, save ="_exclude_genes_highlyvar.png")
else:
sys.exit("exclusion file %s not found, check the path and try again" % args.exclude_file)
L.error("Exclusion file %s not found" % args.exclude_file)
sys.exit("Exclusion file %s not found" % args.exclude_file)

if isinstance(mdata, mu.MuData):
mdata.update()

L.debug(adata.uns['log1p'])

# filter by hvgs
filter_by_hvgs = args.filter_by_hvg

if filter_by_hvgs is True:
L.info("filtering object to only include highly variable genes")
L.info("Subsetting object to only include HVGs")
mdata.mod["rna"] = adata[:, adata.var.highly_variable]
if isinstance(mdata, mu.MuData):
mdata.update()
Expand All @@ -191,39 +191,39 @@


adata.layers['logged_counts'] = adata.X.copy()
L.debug(adata.uns['log1p'])
# regress out
if args.regress_out is not None:
L.info("Regressing out %s" % args.regress_out)
regress_opts = args.regress_out.split(",")
L.info("regressing out %s" % regress_opts)
sc.pp.regress_out(adata, regress_opts)


if args.scale is True and args.scale_max_value is None:
L.info("scaling data with default parameters")
L.info("Scaling data with default parameters")
sc.pp.scale(adata)
elif args.scale is True:
L.info("scaling data to max value %i" % int(args.scale_max_value))
L.info("Scaling data to max value %i" % int(args.scale_max_value))
sc.pp.scale(adata, max_value=int(args.scale_max_value))
else:
L.info("not scaling data")
pass
L.info("Not scaling the data")
#pass


L.debug(adata.uns['log1p'])
# run pca
L.info("running pca")
L.info("Running PCA")

if adata.var.shape[0] < int(args.n_pcs):
L.info("You have less features than number of PCs you intend to calculate")
L.warning("You have less features (%s) than number of PCs (%t) you intend to calculate." % (adata.var.shape[0], args.n_pcs))
n_pcs = adata.var.shape[0] - 1
L.info("Setting n PCS to %i" % int(n_pcs))
L.info("Setting number of PCS to %i" % int(n_pcs))
else:
n_pcs = int(args.n_pcs)
sc.tl.pca(adata, n_comps=n_pcs,
svd_solver=args.pca_solver,
random_state=0)

# do some plots!
# pca plots
L.info("Plotting PCA")
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=n_pcs, save=".png")

col_variables = args.color_by.split(",")
Expand All @@ -237,8 +237,7 @@

mdata.update()
# save the scaled adata object

L.info("Saving updated MuData to '%s'" % args.output_scaled_mudata)
mdata.write(args.output_scaled_mudata)
L.debug(adata.uns['log1p'])

L.info("done")
L.info("Done")
1 change: 0 additions & 1 deletion panpipes/python_scripts/run_preprocess_spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@
L.error("X is not raw data and 'raw_counts' layer not found")
sys.exit("X is not raw data and 'raw_counts' layer not found")

L.info("Using raw counts for HVG selection & normalization")

# Normalization + HVG selection based on flavour
if args.norm_hvg_flavour == "squidpy":
Expand Down
2 changes: 1 addition & 1 deletion panpipes/python_scripts/run_scanpyQC_spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
single_id = os.path.basename(str(args.input_anndata))
single_id = single_id.replace("_raw.h5mu","")

L.info("Saving updated MuData.obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv")
L.info("Saving updated obs in a metadata tsv file to ./" + single_id + "_cell_metadata.tsv")
write_obs(mdata, output_prefix=single_id, output_suffix="_cell_metadata.tsv")
L.info("Saving updated MuData to '%s'" % args.outfile)
mdata.write(args.outfile)
Expand Down

0 comments on commit e7ec9f7

Please sign in to comment.