diff --git a/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py b/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py index 9681ecf5..6cc1efbe 100644 --- a/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py +++ b/panpipes/python_scripts/rerun_find_neighbors_for_clustering.py @@ -35,20 +35,19 @@ neighbor_dict = read_yaml(args.neighbor_dict) sc.settings.n_jobs = int(args.n_threads) -L.info("Running with options: %s", args) # read data -L.info("reading mudata") +L.info("Reading in MuData from '%s'" % args.infile) mdata = read(args.infile) for mod in neighbor_dict.keys(): if neighbor_dict[mod]['use_existing']: - L.info('using existing neighbors graph for %s' % mod) + L.info('Using existing neighbors graph for %s' % mod) pass else: - L.info("computing new neighbors for %s" % mod) + L.info("Computing new neighbors for %s" % mod) if type(mdata) is MuData: adata=mdata[mod] if (neighbor_dict[mod]['dim_red'] == "X_pca") and ("X_pca" not in adata.obsm.keys()): @@ -82,6 +81,6 @@ -L.info("saving data") +L.info("Saving updated MuData to '%s'" % args.outfile) mdata.write(args.outfile) -L.info("done") \ No newline at end of file +L.info("Done") \ No newline at end of file diff --git a/panpipes/python_scripts/run_clustering.py b/panpipes/python_scripts/run_clustering.py index 965a7bcb..832c058b 100644 --- a/panpipes/python_scripts/run_clustering.py +++ b/panpipes/python_scripts/run_clustering.py @@ -33,6 +33,7 @@ L.info(args) # read data +L.info("Reading in data from '%s'" % args.infile) mdata = mu.read(args.infile) if type(mdata) is AnnData: adata = mdata @@ -45,23 +46,30 @@ # check sc.pp.neihgbours has been run if uns_key not in adata.uns.keys(): # sys.exit("Error: sc.pp.neighbours has not been run on this object") + L.warning("Running neighbors with default parameters since no neighbors graph found in this data object") sc.pp.neighbors(adata) uns_key="neighbors" # run command if args.algorithm == "louvain": + L.info("Running Louvain clustering") sc.tl.louvain(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key) elif args.algorithm == "leiden": + L.info("Running Leiden clustering") sc.tl.leiden(adata, resolution=float(args.resolution), key_added='clusters', neighbors_key=uns_key) else: - sys.exit("algorithm not found: please specify 'louvain' or 'leiden'") + L.error("Could not find clustering algorithm '%s'. Please specify 'louvain' or 'leiden'" % args.algorithm) + sys.exit("Could not find clustering algorithm '%s'. Please specify 'louvain' or 'leiden'" % args.algorithm) #mdata.update() ## write out clusters as text file +L.info("Saving cluster column to csv file '%s'" % args.outfile) clusters = pd.DataFrame(adata.obs['clusters']) clusters.to_csv(args.outfile, sep='\t') +L.info("Saving cell numbers per cluster to csv file") + tmp = clusters['clusters'].value_counts().to_frame("cell_num").reset_index().rename(columns={"index":"cluster"}) tmp.to_csv(os.path.dirname(args.outfile) + "/cellnum_per_cluster.csv") diff --git a/panpipes/python_scripts/run_umap.py b/panpipes/python_scripts/run_umap.py index 6a5b957b..63587482 100644 --- a/panpipes/python_scripts/run_umap.py +++ b/panpipes/python_scripts/run_umap.py @@ -39,6 +39,7 @@ L.info(args) # read data +L.info("Reading in data from '%s'" % args.infile) mdata = mu.read(args.infile) if type(mdata) is AnnData: adata = mdata @@ -55,7 +56,7 @@ # check sc.pp.neihgbours has been run if uns_key not in adata.uns.keys(): # sys.exit("Error: sc.pp.neighbours has not been run on this object") - L.warning("running neighbors with default parameters since no neighbors graph found in this data object") + L.warning("Running neighbors with default parameters since no neighbors graph found in this data object") sc.pp.neighbors(adata) uns_key="neighbors" @@ -68,6 +69,7 @@ # what parameters? +L.info("Running UMAP on neighbors_key %s" % uns_key) if uns_key =="wnn": mu.tl.umap(adata, min_dist=float(args.min_dist), neighbors_key=uns_key) else: @@ -81,4 +83,5 @@ # save coordinates to file # (note this saves values values up to 6 significant figures, because why save 20 for a plot +L.info("Saving UMAP coordinates to csv file '%s'" % args.outfile) umap_coords.to_csv(args.outfile, sep = '\t')