aristoteleo · AlexanderCaichen · Nov 16, 2023 · Nov 17, 2023 · Nov 21, 2023
diff --git a/dynamo/preprocessing/gene_selection.py b/dynamo/preprocessing/gene_selection.py
@@ -233,14 +233,28 @@ def calc_dispersion_by_svr(
 
     for layer in layers:
         valid_CM, detected_bool = get_vaild_CM(adata, layer, **SVRs_kwargs)
-        if valid_CM is None:
-            continue
-
-        mean, cv = get_mean_cv(adata, valid_CM, algorithm, winsorize, winsor_perc)
-        fitted_fun, svr_gamma = get_prediction_by_svr(mean, cv, svr_gamma)
-        score = cv - fitted_fun(mean)
-        if sort_inverse:
-            score = -score
+        # valid_CM seems to be set as an empty sparse array when adata.var has too few rows. 
+        # This is not picked up via None checks. (Empty valid_CM leads to a divide by 0 error in get_prediction_by_svr())
+        # Note that simply doing `not valid_CM.toarray()` results in "truth value of array..." error due to numpy array
+        # `not valid_CM.toarray().tolist()` doesn't work either because a list of empty lists still gives False
+        if valid_CM is None or valid_CM.shape[1] == 0:
+            main_warning("No valid_CM for layer " + layer)
+
+            #If all layers are skipped then there will be no "score" column, causing a KeyError during preprocessing
+            #continue
+
+            #Temporary values.
+            #adata.shape[1] == valid_CM.shape[1] = adata.var.shape[0]
+            temp = np.full((adata.shape[1],), 0)
+            mean, cv = np.full((adata.shape[1],1), 0), temp
+            _, svr_gamma = None, temp
+            score = temp
+        else:
+            mean, cv = get_mean_cv(adata, valid_CM, algorithm, winsorize, winsor_perc)
+            fitted_fun, svr_gamma = get_prediction_by_svr(mean, cv, svr_gamma)
+            score = cv - fitted_fun(mean)
+            if sort_inverse:
+                score = -score
 
         # Now we can get "SVR" from get_prediction_by_svr
         key = "velocyto_SVR" if layer == "raw" or layer == "X" else layer + "_velocyto_SVR"

diff --git a/dynamo/preprocessing/pca.py b/dynamo/preprocessing/pca.py
@@ -16,7 +16,7 @@
 from sklearn.utils.sparsefuncs import mean_variance_axis
 
 from ..configuration import DKM
-from ..dynamo_logger import main_info_insert_adata_obsm, main_info_insert_adata_var
+from ..dynamo_logger import main_info_insert_adata_obsm, main_info_insert_adata_var, main_warning
 
 
 def _truncatedSVD_with_center(
@@ -46,7 +46,9 @@ def _truncatedSVD_with_center(
     random_state = check_random_state(random_state)
     np.random.set_state(random_state.get_state())
     v0 = random_state.uniform(-1, 1, np.min(X.shape))
-    n_components = min(n_components, X.shape[1] - 1)
+    # svds() requires 0 < k < min(X.shape)
+    # min(X.shape) or X[0] <= 30 when adata.obs is pruned to <= 30 rows
+    n_components = min(n_components, min(X.shape) - 1)
 
     mean = X.mean(0)
     X_H = X.T.conj()
@@ -241,7 +243,13 @@ def pca(
         adata.var.iloc[bad_genes, adata.var.columns.tolist().index("use_for_pca")] = False
         X_data = X_data[:, valid_ind]
 
-    if use_incremental_PCA:
+    if 0 in X_data.shape:
+        main_warning("No genes passed filter, ABORTING PCA REDUCTION.")
+        if return_all:
+            return adata, None, None
+        else:
+            return adata
+    elif use_incremental_PCA:
         from sklearn.decomposition import IncrementalPCA
 
         fit, X_pca = _pca_fit(

diff --git a/dynamo/preprocessing/utils.py b/dynamo/preprocessing/utils.py
@@ -415,6 +415,13 @@ def get_svr_filter(
     valid_idx = np.where(np.isfinite(adata.var.loc[:, score_name]))[0]
 
     valid_table = adata.var.iloc[valid_idx, :]
+    if len(valid_table) == 0:
+        main_warning("No gene with valid svr scores")
+        if return_adata:
+            return adata
+        else:
+            return np.zeros(adata.n_vars, dtype=bool)
+
     nth_score = np.sort(valid_table.loc[:, score_name])[::-1][np.min((n_top_genes - 1, valid_table.shape[0] - 1))]
 
     feature_gene_idx = np.where(valid_table.loc[:, score_name] >= nth_score)[0][:n_top_genes]