11
11
12
12
import anndata as ad
13
13
from anndata ._core .anndata import AnnData
14
- from anndata .compat import CupyArray , DaskArray
14
+ from anndata .compat import CupyArray , DaskArray , DaskDataFrame
15
15
from anndata .experimental .merge import as_group
16
16
from anndata .tests .helpers import (
17
17
GEN_ADATA_DASK_ARGS ,
@@ -74,6 +74,7 @@ def test_dask_X_view():
74
74
75
75
def test_dask_write (adata , tmp_path , diskfmt ):
76
76
import dask .array as da
77
+ import dask .dataframe as ddf
77
78
import numpy as np
78
79
79
80
pth = tmp_path / f"test_write.{ diskfmt } "
@@ -84,6 +85,12 @@ def test_dask_write(adata, tmp_path, diskfmt):
84
85
adata .obsm ["a" ] = da .random .random ((M , 10 ))
85
86
adata .obsm ["b" ] = da .random .random ((M , 10 ))
86
87
adata .varm ["a" ] = da .random .random ((N , 10 ))
88
+ adata .varm ["b" ] = ddf .from_pandas (
89
+ pd .DataFrame (
90
+ {"A" : np .arange (N ), "B" : np .random .randint (1e6 , size = N )},
91
+ index = adata .var_names ,
92
+ )
93
+ )
87
94
88
95
orig = adata
89
96
write (orig , pth )
@@ -93,6 +100,7 @@ def test_dask_write(adata, tmp_path, diskfmt):
93
100
assert_equal (curr .obsm ["a" ], curr .obsm ["b" ])
94
101
95
102
assert_equal (curr .varm ["a" ], orig .varm ["a" ])
103
+ assert_equal (orig .varm ["b" ], orig .varm ["b" ])
96
104
assert_equal (curr .obsm ["a" ], orig .obsm ["a" ])
97
105
98
106
assert isinstance (curr .X , np .ndarray )
@@ -105,6 +113,7 @@ def test_dask_write(adata, tmp_path, diskfmt):
105
113
106
114
def test_dask_distributed_write (adata , tmp_path , diskfmt ):
107
115
import dask .array as da
116
+ import dask .dataframe as ddf
108
117
import dask .distributed as dd
109
118
import numpy as np
110
119
@@ -119,6 +128,12 @@ def test_dask_distributed_write(adata, tmp_path, diskfmt):
119
128
adata .obsm ["a" ] = da .random .random ((M , 10 ))
120
129
adata .obsm ["b" ] = da .random .random ((M , 10 ))
121
130
adata .varm ["a" ] = da .random .random ((N , 10 ))
131
+ adata .varm ["b" ] = ddf .from_pandas (
132
+ pd .DataFrame (
133
+ {"A" : np .arange (N ), "B" : np .random .randint (1e6 , size = N )},
134
+ index = adata .var_names ,
135
+ )
136
+ )
122
137
orig = adata
123
138
if diskfmt == "h5ad" :
124
139
with pytest .raises (ValueError , match = r"Cannot write dask arrays to hdf5" ):
@@ -131,6 +146,7 @@ def test_dask_distributed_write(adata, tmp_path, diskfmt):
131
146
assert_equal (curr .obsm ["a" ], curr .obsm ["b" ])
132
147
133
148
assert_equal (curr .varm ["a" ], orig .varm ["a" ])
149
+ assert_equal (orig .varm ["b" ], curr .varm ["a" ])
134
150
assert_equal (curr .obsm ["a" ], orig .obsm ["a" ])
135
151
136
152
assert isinstance (curr .X , np .ndarray )
@@ -143,6 +159,7 @@ def test_dask_distributed_write(adata, tmp_path, diskfmt):
143
159
144
160
def test_dask_to_memory_check_array_types (adata , tmp_path , diskfmt ):
145
161
import dask .array as da
162
+ import dask .dataframe as ddf
146
163
import numpy as np
147
164
148
165
pth = tmp_path / f"test_write.{ diskfmt } "
@@ -153,6 +170,12 @@ def test_dask_to_memory_check_array_types(adata, tmp_path, diskfmt):
153
170
adata .obsm ["a" ] = da .random .random ((M , 10 ))
154
171
adata .obsm ["b" ] = da .random .random ((M , 10 ))
155
172
adata .varm ["a" ] = da .random .random ((N , 10 ))
173
+ adata .varm ["b" ] = ddf .from_pandas (
174
+ pd .DataFrame (
175
+ {"A" : np .arange (N ), "B" : np .random .randint (1e6 , size = N )},
176
+ index = adata .var_names ,
177
+ )
178
+ )
156
179
157
180
orig = adata
158
181
write (orig , pth )
@@ -161,6 +184,7 @@ def test_dask_to_memory_check_array_types(adata, tmp_path, diskfmt):
161
184
assert isinstance (orig .X , DaskArray )
162
185
assert isinstance (orig .obsm ["a" ], DaskArray )
163
186
assert isinstance (orig .varm ["a" ], DaskArray )
187
+ assert isinstance (orig .varm ["b" ], DaskDataFrame )
164
188
165
189
mem = orig .to_memory ()
166
190
@@ -171,20 +195,25 @@ def test_dask_to_memory_check_array_types(adata, tmp_path, diskfmt):
171
195
assert_equal (curr .obsm ["a" ], orig .obsm ["a" ])
172
196
assert_equal (mem .obsm ["a" ], orig .obsm ["a" ])
173
197
assert_equal (mem .varm ["a" ], orig .varm ["a" ])
198
+ assert_equal (orig .varm ["b" ], mem .varm ["b" ])
174
199
175
200
assert isinstance (curr .X , np .ndarray )
176
201
assert isinstance (curr .obsm ["a" ], np .ndarray )
177
202
assert isinstance (curr .varm ["a" ], np .ndarray )
203
+ assert isinstance (curr .varm ["b" ], pd .DataFrame )
178
204
assert isinstance (mem .X , np .ndarray )
179
205
assert isinstance (mem .obsm ["a" ], np .ndarray )
180
206
assert isinstance (mem .varm ["a" ], np .ndarray )
207
+ assert isinstance (mem .varm ["b" ], pd .DataFrame )
181
208
assert isinstance (orig .X , DaskArray )
182
209
assert isinstance (orig .obsm ["a" ], DaskArray )
183
210
assert isinstance (orig .varm ["a" ], DaskArray )
211
+ assert isinstance (orig .varm ["b" ], DaskDataFrame )
184
212
185
213
186
214
def test_dask_to_memory_copy_check_array_types (adata , tmp_path , diskfmt ):
187
215
import dask .array as da
216
+ import dask .dataframe as ddf
188
217
import numpy as np
189
218
190
219
pth = tmp_path / f"test_write.{ diskfmt } "
@@ -195,6 +224,12 @@ def test_dask_to_memory_copy_check_array_types(adata, tmp_path, diskfmt):
195
224
adata .obsm ["a" ] = da .random .random ((M , 10 ))
196
225
adata .obsm ["b" ] = da .random .random ((M , 10 ))
197
226
adata .varm ["a" ] = da .random .random ((N , 10 ))
227
+ adata .varm ["b" ] = ddf .from_pandas (
228
+ pd .DataFrame (
229
+ {"A" : np .arange (N ), "B" : np .random .randint (1e6 , size = N )},
230
+ index = adata .var_names ,
231
+ )
232
+ )
198
233
199
234
orig = adata
200
235
write (orig , pth )
@@ -209,25 +244,36 @@ def test_dask_to_memory_copy_check_array_types(adata, tmp_path, diskfmt):
209
244
assert_equal (curr .obsm ["a" ], orig .obsm ["a" ])
210
245
assert_equal (mem .obsm ["a" ], orig .obsm ["a" ])
211
246
assert_equal (mem .varm ["a" ], orig .varm ["a" ])
247
+ assert_equal (orig .varm ["b" ], mem .varm ["b" ])
212
248
213
249
assert isinstance (curr .X , np .ndarray )
214
250
assert isinstance (curr .obsm ["a" ], np .ndarray )
215
251
assert isinstance (curr .varm ["a" ], np .ndarray )
252
+ assert isinstance (curr .varm ["b" ], pd .DataFrame )
216
253
assert isinstance (mem .X , np .ndarray )
217
254
assert isinstance (mem .obsm ["a" ], np .ndarray )
218
255
assert isinstance (mem .varm ["a" ], np .ndarray )
256
+ assert isinstance (mem .varm ["b" ], pd .DataFrame )
219
257
assert isinstance (orig .X , DaskArray )
220
258
assert isinstance (orig .obsm ["a" ], DaskArray )
221
259
assert isinstance (orig .varm ["a" ], DaskArray )
260
+ assert isinstance (orig .varm ["b" ], DaskDataFrame )
222
261
223
262
224
263
def test_dask_copy_check_array_types (adata ):
225
264
import dask .array as da
265
+ import dask .dataframe as ddf
226
266
227
267
M , N = adata .X .shape
228
268
adata .obsm ["a" ] = da .random .random ((M , 10 ))
229
269
adata .obsm ["b" ] = da .random .random ((M , 10 ))
230
270
adata .varm ["a" ] = da .random .random ((N , 10 ))
271
+ adata .varm ["b" ] = ddf .from_pandas (
272
+ pd .DataFrame (
273
+ {"A" : np .arange (N ), "B" : np .random .randint (1e6 , size = N )},
274
+ index = adata .var_names ,
275
+ )
276
+ )
231
277
232
278
orig = adata
233
279
curr = adata .copy ()
@@ -236,14 +282,17 @@ def test_dask_copy_check_array_types(adata):
236
282
assert_equal (curr .obsm ["a" ], curr .obsm ["b" ])
237
283
238
284
assert_equal (curr .varm ["a" ], orig .varm ["a" ])
285
+ assert_equal (orig .varm ["b" ], curr .varm ["b" ])
239
286
assert_equal (curr .obsm ["a" ], orig .obsm ["a" ])
240
287
241
288
assert isinstance (curr .X , DaskArray )
242
289
assert isinstance (curr .obsm ["a" ], DaskArray )
243
290
assert isinstance (curr .varm ["a" ], DaskArray )
291
+ assert isinstance (curr .varm ["b" ], DaskDataFrame )
244
292
assert isinstance (orig .X , DaskArray )
245
293
assert isinstance (orig .obsm ["a" ], DaskArray )
246
294
assert isinstance (orig .varm ["a" ], DaskArray )
295
+ assert isinstance (orig .varm ["b" ], DaskDataFrame )
247
296
248
297
249
298
def test_assign_X (adata ):
0 commit comments