Merge pull request #13 from DendrouLab/CRG_compat_with_panpipes_dev

updating with sc_pipeline_muon_dev
DendrouLab · Mar 9, 2023 · 49f7393 · 49f7393
2 parents 0397275 + b4fe99e
commit 49f7393
Show file tree

Hide file tree

Showing 24 changed files with 630 additions and 338 deletions.
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ Available pipelines:
 See [installation instrcutions here](https://github.com/DendrouLab/panpipes/blob/main/docs/install.md)
 Review this issue before installatiion: https://github.com/DendrouLab/panpipes/issues/11 
 
-<!-- Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md) -->
+Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md)
 
 # General principles for running pipelines
 

diff --git a/docs/install.md b/docs/install.md
@@ -33,7 +33,7 @@ conda activate pipeline_env
 we include an environment.yml for a conda environment tested on all the pipelines packaged in this version of Panpipes.
 
 ##### Step 2 Download and install this repo
-
+If you have not already set up SSH keys for github first follow these [instructions](https://github.com/DendrouLab/panpipes/docs/set_up_ssh_keys_for_github.md): 
 
 ```
 git clone https://github.com/DendrouLab/panpipes

diff --git a/docs/set_up_ssh_keys_for_github.md b/docs/set_up_ssh_keys_for_github.md
@@ -0,0 +1,25 @@
+
+## Set up SSH key for GitHub
+For more advice: https://docs.github.com/en/authentication/connecting-to-github-with-ssh/about-ssh
+
+After checking for existing keys, if you receive error that ~/.ssh doesn't exist then you don't have one. If there already is one (ie. id_rsa.pub, id_ed25519.pub) then you can either connect it to GitHub or generate new one.
+```
+ls -al ~/.ssh #check for existing keys
+ssh-keygen -t ed25519 -C "[email protected]"                                       #use your GitHub email address
+#Enter a file in which to save the key (/c/Users/you/.ssh/id_algorithm):[Press enter]
+#Enter passphrase (empty for no passphrase): [Type a passphrase]
+eval "$(ssh-agent -s)"                                                                  #start ssh-agent
+ssh-add ~/.ssh/id_ed25519                                                               #add your SSH private key to ssh-agent
+clip < ~/.ssh/id_ed25519.pub                                                            #copy SSH public key 
+```
+After copying your SSH public key, go to GitHub --> Settings --> SSH and GPG keys (under Access) --> Add new public SSH key
+
+To test connection 
+```
+ssh -T [email protected] 
+```
+A successful connection should result in 
+> Hi username! You've successfully authenticated, but GitHub does not provide shell access.
+
+Activate the environment
+```
diff --git a/panpipes/funcs/scmethods.py b/panpipes/funcs/scmethods.py
@@ -4,6 +4,7 @@
 from scipy.sparse import issparse
 from scanpy.get import obs_df as get_obs_df
 from scanpy.pp import normalize_total
+import scanpy as sc
 import warnings
 import logging
 from typing import Optional, Literal
@@ -38,9 +39,53 @@ def exp_mean_dense(x):
     # convert out of compressed sparse matrix
     return np.log((np.sum(np.exp(x)-1)/x.shape[1]) + 1)
 
-
-
-def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfcdiff=0.25, use_dense=False):
+def find_all_markers_pseudo_seurat(
+        adata, 
+        groups,
+        groupby,
+        layer=None,
+        method=None,
+        n_genes=float("inf"), 
+        corr_method="bonferroni",
+        arg_minpct=0.1,
+        arg_mindiffpct=-float("inf"), 
+        arg_logfcdiff=0.25):
+    # add replace X with layer
+    if layer is not None:
+        adata.X = adata.layers[layer]
+    # need to check is the assay layer is dense or not
+    assay_is_sparse = issparse(adata.X)
+    use_dense = assay_is_sparse==False 
+    if groups == 'all':
+        groups = adata.obs[groupby].unique().tolist()
+    markers_dict = {}
+    filter_dict = {}
+    for cv in groups:
+        # \ set up idenst as cv ==1 and everything else = 0
+        adata.obs['idents'] = ['1' if x == cv else '0' for x in adata.obs[groupby]]
+        filter_dict[cv] = pseudo_seurat(adata, use_dense=use_dense,arg_minpct=arg_minpct,
+                  arg_mindiffpct=arg_mindiffpct, 
+                  arg_logfcdiff=arg_logfcdiff )
+        logging.info("number of genes remaining after filtering:  %i\n" % filter_dict[cv]['background'].sum())
+        adata_rg = adata[:, filter_dict[cv]['background'].tolist()].copy()
+        sc.tl.rank_genes_groups(adata_rg, layer=layer,
+                                groupby="idents", groups=["1"],  
+                                reference="0",
+                                method=method, 
+                                n_genes=float("inf"), 
+                                corr_method="bonferroni")
+        markers_dict[cv] = sc.get.rank_genes_groups_df(adata_rg, group="1")
+        # remove adata from mem
+        adata_rg = None
+    markers = pd.concat(markers_dict.values(), keys=markers_dict.keys())
+    filter_stats = pd.concat(filter_dict.values(), keys=filter_dict.keys())
+    return markers, filter_stats
+
+def pseudo_seurat(adata, 
+                  arg_minpct=0.1,
+                  arg_mindiffpct=-float("inf"), 
+                  arg_logfcdiff=0.25, 
+                  use_dense=False):
     """
     alternative method that"s more like seurat (pseudo seurat if you will)
     In that you filter genes before running rank genes
@@ -79,7 +124,6 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
     min_pct = pcts.min(axis=1)
     diff_pct = max_pct - min_pct
     take_diff_pct = diff_pct > arg_mindiffpct
-
     # remove genes that are not expressed higher than 0.1 in one of the groups
     take_min_pct = max_pct > arg_minpct
 
@@ -88,7 +132,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
     # this has the potential to be very slow. Transposeing it speeds it up a bit.
     # I need to undertand sparse matrices better to make it work
     if use_dense:
-        print("using dense matrix")
+        logging.info("using dense matrix")
         # extract the counts for cluster cells and calculate exp means on each row
         nct = adata.X.T[:, cluster_cells_ind]
         cluster_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
@@ -98,7 +142,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
         other_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
         diff_mean = abs(cluster_mean - other_mean)
     else:
-        print("using sparse matrix")
+        logging.info("using sparse matrix")
         cluster_mean = exp_mean_sparse(adata.X.T[:, cluster_cells_ind])
         other_mean = exp_mean_sparse(adata.X.T[:, other_cells_ind])
         diff_mean = abs(cluster_mean - other_mean).A1
@@ -122,7 +166,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
     # useful if we are dealing with a MuData object but we want to use single rep, e.g.
     # calculating neighbors on a totalVI latent rep
     if method == "scanpy":
-        print("Computing neighbors using scanpy")
+        logging.info("Computing neighbors using scanpy")
         from scanpy.pp import neighbors
         neighbors(adata,
                         n_pcs=n_pcs,
@@ -131,7 +175,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
                         use_rep=use_rep)
     elif method == "hnsw":
         from scvelo.pp import neighbors
-        print("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
+        logging.info("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
         # we use the neighbors function from scvelo (thanks!)
         # with parameters from pegasus (for a more exact result).
         # code snippet from Steve Sansom, via COMBAT project