fix #76 #75 #77 #62

mortazavilab · Dec 4, 2023 · 7c781fa · 7c781fa
1 parent b941d68
commit 7c781fa
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 21 deletions.
diff --git a/PyWGCNA/utils.py b/PyWGCNA/utils.py
@@ -1,6 +1,7 @@
 import pickle
 import os
 import biomart
+import pandas as pd
 import requests
 import matplotlib.pyplot as plt
 import networkx as nx
@@ -123,10 +124,11 @@ def getGeneList(dataset='mmusculus_gene_ensembl',
     # Store the data in a dict
     for line in data.splitlines():
         line = line.split('\t')
+        tmp = pd.DataFrame(line, index=attributes).T
         dict = {}
         for i in range(len(attributes)):
             dict[attributes[i]] = line[i]
-        geneInfo = geneInfo.append(dict, ignore_index=True)
+        geneInfo = pd.concat([geneInfo, tmp], ignore_index=True)
 
     geneInfo.index = geneInfo[attributes[0]]
     geneInfo.drop(attributes[0], axis=1, inplace=True)

diff --git a/PyWGCNA/wgcna.py b/PyWGCNA/wgcna.py
@@ -1741,7 +1741,7 @@ def cutreeHybrid(dendro, distM, cutHeight=None, minClusterSize=20, deepSplit=1,
                                     useObjects = ColorsX in np.unique(labelsOnBranch)
                                     DistSClustClust = distM.iloc[InCluster, useObjects]
                                     MeanDist = DistSClustClust.mean(axis=0)
-                                    useColorsFac = pd.Categorical(ColorsX[useObjects])
+                                    useColorsFac = ColorsX[useObjects]#pd.Categorical(ColorsX[useObjects])
                                     MeanDist = pd.DataFrame({'MeanDist': MeanDist, 'useColorsFac': useColorsFac})
                                     MeanMeanDist = MeanDist.groupby(
                                         'useColorsFac').mean()  # tapply(MeanDist, useColorsFac, mean)
@@ -1760,7 +1760,7 @@ def cutreeHybrid(dendro, distM, cutHeight=None, minClusterSize=20, deepSplit=1,
                                 InCluster = np.where(SmallLabels == sclust)[0].tolist()
                                 DistSClustClust = distM.iloc[InCluster, useObjects]
                                 MeanDist = DistSClustClust.mean(axis=0)
-                                useColorsFac = pd.Categorical(ColorsX[useObjects])
+                                useColorsFac = ColorsX[useObjects]#pd.Categorical(ColorsX[useObjects])
                                 MeanDist = pd.DataFrame({'MeanDist': MeanDist, 'useColorsFac': useColorsFac})
                                 MeanMeanDist = MeanDist.groupby(
                                     'useColorsFac').mean()  # tapply(MeanDist, useColorsFac, mean)
@@ -1782,7 +1782,7 @@ def cutreeHybrid(dendro, distM, cutHeight=None, minClusterSize=20, deepSplit=1,
                             basicOnBranch = branch_basicClusters[onBr - 1]
                             labelsOnBranch = branchLabels[basicOnBranch]
                             useObjects = ColorsX in np.unique(labelsOnBranch)
-                            useColorsFac = pd.Categorical(ColorsX[useObjects])
+                            useColorsFac = ColorsX[useObjects]#pd.Categorical(ColorsX[useObjects])
                             UnassdToClustDist = distM.iloc[useObjects, obj].groupby(
                                 'useColorsFac').mean()  # tapply(distM[useObjects, obj], useColorsFac, mean)
                             nearest = UnassdToClustDist.idxmin().astype(int) - 1
@@ -1794,7 +1794,7 @@ def cutreeHybrid(dendro, distM, cutHeight=None, minClusterSize=20, deepSplit=1,
                                 nPAMed = nPAMed + 1
                     else:
                         useObjects = np.where(ColorsX != 0)[0].tolist()
-                        useColorsFac = pd.Categorical(ColorsX[useObjects])
+                        useColorsFac = ColorsX[useObjects]#pd.Categorical(ColorsX[useObjects])
                         tmp = pd.DataFrame(distM.iloc[useObjects, Unlabeled])
                         tmp['group'] = useColorsFac
                         UnassdToClustDist = tmp.groupby(
@@ -2000,9 +2000,9 @@ def moduleEigengenes(expr, colors, impute=True, nPC=1, align="along average", ex
                 u = u[:, 0:min(n, p, nPC)]
                 v = v[0:min(n, p, nPC), :]
                 tmp = datModule.copy()
-                tmp = tmp.append(pd.DataFrame(v[0:min(n, p, nVarExplained), :],
-                                              columns=tmp.columns.tolist()),
-                                 ignore_index=True)
+                tmp = pd.concat([tmp, pd.DataFrame(v[0:min(n, p, nVarExplained), :],
+                                                   columns=tmp.columns.tolist())],
+                                ignore_index=True)
                 veMat = pd.DataFrame(np.corrcoef(tmp.values)).iloc[:-1, -1].T
                 varExpl.iloc[0:min(n, p, nVarExplained), i] = (veMat ** 2).mean(axis=0)
                 pc = v[0].tolist()
@@ -2020,7 +2020,7 @@ def moduleEigengenes(expr, colors, impute=True, nPC=1, align="along average", ex
                         scaledExpr = pd.DataFrame(scale(datModule.T).T, index=datModule.index,
                                                   columns=datModule.columns)
                         covEx = np.cov(scaledExpr)
-                        covEx[not np.isfinite(covEx)] = 0
+                        covEx[~ np.isfinite(covEx)] = 0
                         modAdj = np.abs(covEx) ** softPower
                         kIM = (modAdj.mean(axis=0)) ** 3
                         if np.max(kIM) > 1:
@@ -2763,22 +2763,27 @@ def getDatTraits(self, metaData):
 
     def module_trait_relationships_heatmap(self,
                                            metaData,
+                                           figsize=None,
                                            show=True,
                                            file_name='module-traitRelationships'):
         """
         plot topic-trait relationship heatmap
 
         :param metaData: traits you would like to see the relationship with topics (must be column name of datExpr.obs)
         :type metaData: list
+        :param figsize: indicate the size of plot
+        :type figsize: tuple of float
         :param show: indicate if you want to show the plot or not (default: True)
         :type show: bool
         :param file_name: name and path of the plot use for save (default: topic-traitRelationships)
         :type file_name: str
         """
         datTraits = self.getDatTraits(metaData)
 
-        fig, ax = plt.subplots(figsize=(max(20, int(self.moduleTraitPvalue.shape[0] * 1.5)),
-                                        self.moduleTraitPvalue.shape[1] * 1.5), facecolor='white')
+        if figsize is None:
+            figsize = (max(20, int(self.moduleTraitPvalue.shape[0] * 1.5)),
+                       self.moduleTraitPvalue.shape[1] * 1.5)
+        fig, ax = plt.subplots(figsize=figsize, facecolor='white')
         # names
         xlabels = []
         for label in self.MEs.columns:
@@ -3110,7 +3115,7 @@ def barplotModuleEigenGene(self, moduleName, metadata, combine=True, colorBar=No
                 else:
                     return axs
 
-    def functional_enrichment_analysis(self, type, moduleName, sets=None, p_value=1, file_name=None):
+    def functional_enrichment_analysis(self, type, moduleName, sets=None, p_value=1, file_name=None, **kwargs):
         """
         Doing functional enrichment analysis including GO, KEGG and REACTOME
 
@@ -3124,6 +3129,8 @@ def functional_enrichment_analysis(self, type, moduleName, sets=None, p_value=1,
         :type p_value: float
         :param file_name: name of the file you want to use to save plot (default is moduleName)
         :type file_name: str
+        :param kwargs: Other keyword arguments are passed through to the underlying gseapy.enrichr() finction
+        :type kwargs: key, value pairings
         """
         if type not in ["GO", "KEGG", "REACTOME"]:
             sys.exit("Type is not valid! it should be one of them GO, KEGG, REACTOME")
@@ -3154,7 +3161,8 @@ def functional_enrichment_analysis(self, type, moduleName, sets=None, p_value=1,
                                  gene_sets=sets,
                                  organism=self.species,
                                  outdir=f"{self.outputPath}figures/{type}/{file_name}",
-                                 cutoff=p_value)
+                                 cutoff=p_value,
+                                 **kwargs)
                 dotplot(enr.res2d,
                         title=f"Gene ontology in {moduleName} module",
                         cmap='viridis_r',
@@ -3409,9 +3417,9 @@ def CoexpressionModulePlot(self, modules, numGenes=10, numConnections=100, minTO
 
         adj = self.TOM.loc[genes, genes]
         adj[adj < minTOM] = 0
-        adj = adj.where(np.triu(np.ones(adj.shape)).astype(bool))
-        adj = adj.where(adj.values != np.diag(adj), 0,
-                        adj.where(adj.values != np.flipud(adj).diagonal(0), 0, inplace=True))
+        adj.where(np.triu(np.ones(adj.shape)).astype(bool), inplace=True)
+        adj.where(adj.values != np.diag(adj), 0, inplace=True)
+        adj.where(adj.values != np.flipud(adj).diagonal(0), 0, inplace=True)
         adj = adj.stack().nlargest(numConnections)
 
         net = Network()

diff --git a/setup.py b/setup.py
@@ -3,17 +3,17 @@
 setup(
     name='PyWGCNA',  # the name of your package
     packages=['PyWGCNA'],  # same as above
-    version='1.20.2',  # version number
+    version='2.0.0',  # version number
     license='MIT',  # license type
     description='PyWGCNA is a Python package designed to do Weighted correlation network analysis (WGCNA)',
     # short description
     author='Narges Rezaie',  # your name
     author_email='[email protected]',  # your email
     url='https://github.com/mortazavilab/PyWGCNA',  # url to your git repo
-    download_url='https://github.com/mortazavilab/PyWGCNA/archive/refs/tags/v1.20.2.tar.gz',  # link to the tar.gz file associated with this release
+    download_url='https://github.com/mortazavilab/PyWGCNA/archive/refs/tags/v1.20.4.tar.gz',  # link to the tar.gz file associated with this release
     keywords=['PyWGCNA', 'WGCNA', 'bulk', 'gene clustering', 'network analysis'],  #
     install_requires=[  # these can also include >, <, == to enforce version compatibility
-        'pandas==1.4.4',  # make sure the packages you put here are those NOT included in the
+        'pandas>=2.1.0',  # make sure the packages you put here are those NOT included in the
         'numpy>=1.24.0',  # base python distribution
         'scipy>=1.9.1',
         'scikit-learn>=1.2.2',
@@ -22,7 +22,7 @@
         'seaborn>=0.11.2',
         'biomart>=0.9.2',
         'gseapy>=1.0.1',
-        'pyvis>=0.3.1',
+        'pyvis==0.3.1',
         'setuptools>=67.4.0',
         'biomart>=0.9.2',
         'reactome2py>=3.0.0',
@@ -38,6 +38,6 @@
         'Intended Audience :: Science/Research ',
         'Topic :: Scientific/Engineering :: Bio-Informatics',
         'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
     ],
 )