From ba69a74da7ec02398292c6ec5e445081798ffa67 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 21 Mar 2024 13:54:03 +0100 Subject: [PATCH 1/4] (fix): optimize subsetting dask array --- anndata/_core/index.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/anndata/_core/index.py b/anndata/_core/index.py index bdf1bfe27..38c2cdede 100644 --- a/anndata/_core/index.py +++ b/anndata/_core/index.py @@ -147,15 +147,10 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index): @_subset.register(DaskArray) def _subset_dask(a: DaskArray, subset_idx: Index): - if all(isinstance(x, cabc.Iterable) for x in subset_idx): + if len(subset_idx) > 1 and all(isinstance(x, cabc.Iterable) for x in subset_idx): if isinstance(a._meta, csc_matrix): return a[:, subset_idx[1]][subset_idx[0], :] - elif isinstance(a._meta, spmatrix): - return a[subset_idx[0], :][:, subset_idx[1]] - else: - # TODO: this may have been working for some cases? - subset_idx = np.ix_(*subset_idx) - return a.vindex[subset_idx] + return a[subset_idx[0], :][:, subset_idx[1]] return a[subset_idx] From 5ff8c1268382415b7a464ab504760c7e7d32f4de Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 21 Mar 2024 16:56:42 +0100 Subject: [PATCH 2/4] (chore): release note --- docs/release-notes/0.10.7.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/release-notes/0.10.7.md b/docs/release-notes/0.10.7.md index ea1929040..2896ba5ca 100644 --- a/docs/release-notes/0.10.7.md +++ b/docs/release-notes/0.10.7.md @@ -10,3 +10,5 @@ ```{rubric} Performance ``` + +* Remove `vindex` for subsetting `dask.array.Array` because of its slowness and memory consumption {user} `ilan-gold` {pr}`1432` From f6a3af98ecf5dae9da1d585415149811f529dd7b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 22 Mar 2024 14:30:19 +0100 Subject: [PATCH 3/4] (fix): try verbose. --- .azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 645ed67db..55c966cf5 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -61,7 +61,7 @@ jobs: condition: eq(variables['DEPENDENCIES_VERSION'], 'minimum') - script: | - uv pip install --system --compile --pre "anndata[dev,test] @ ." + uv pip install -v --system --compile --pre "anndata[dev,test] @ ." displayName: "Install dependencies release candidates" condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release') From fd4e7c91dfe9ed1015365cbfb0fde7709a1f9262 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Fri, 22 Mar 2024 15:01:43 +0100 Subject: [PATCH 4/4] pin scanpy high for pre-releases --- .azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 55c966cf5..a2c99c597 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -61,7 +61,7 @@ jobs: condition: eq(variables['DEPENDENCIES_VERSION'], 'minimum') - script: | - uv pip install -v --system --compile --pre "anndata[dev,test] @ ." + uv pip install -v --system --compile --pre "anndata[dev,test] @ ." "scanpy>=1.10.0rc1" displayName: "Install dependencies release candidates" condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release')