Skip to content

Commit

Permalink
Convert categorical vectors from AnnData (untested).
Browse files Browse the repository at this point in the history
  • Loading branch information
orenbenkiki committed Mar 27, 2024
1 parent abcbe89 commit b14ada4
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.2"
manifest_format = "2.0"
project_hash = "ab97e819dc88455bde61777b2e472f546559810b"
project_hash = "7a1115b240cf0d66872bd0ff033c1831d93217f8"

[[deps.ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ authors = ["Oren Ben-Kiki <[email protected]>"]
version = "0.1.0"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ConcurrentUtils = "3df5f688-6c4c-4767-8685-17f5ad261477"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand Down
2 changes: 1 addition & 1 deletion docs/v0.1.0/.documenter-siteinfo.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-03-27T14:12:53","documenter_version":"1.3.0"}}
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-03-27T17:36:40","documenter_version":"1.3.0"}}
4 changes: 4 additions & 0 deletions docs/v0.1.0/anndata_format.html
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ <h1 id="AnnData-Format">
<code>Daf
</code> natively support nullable/masked arrays.
</li>
<li>Categorical data. Categorical vectors are therefore converted to simple strings. However,
<code>Daf
</code> doesn&#39;t support matrices of strings, so it doesn&#39;t support or convert categorical matrices.
</li>
<li>Matrix data that only uses one of the axes (that is,
<code>obsm
</code> and
Expand Down
2 changes: 1 addition & 1 deletion docs/v0.1.0/search_index.js

Large diffs are not rendered by default.

23 changes: 19 additions & 4 deletions src/anndata_format.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ The following `AnnData` can't be naively stored in `Daf`:
convention that zero values are special. This only works in some cases (e.g., it isn't a good solution for Boolean
data). It is possible of course to explicitly store Boolean masks and apply them to the data, but this is
inconvenient. TODO: Have `Daf` natively support nullable/masked arrays.
- Categorical data. Categorical vectors are therefore converted to simple strings. However, `Daf` doesn't support
matrices of strings, so it doesn't support or convert categorical matrices.
- Matrix data that only uses one of the axes (that is, `obsm` and `varm` data). The problem here is, paradoxically,
that `Daf` supports such data "too well", by allowing multiple axes to be defined, and storing matrices based on any
pair of axes. However, this requires the other axes to be explicitly created, and their information just doesn't
Expand Down Expand Up @@ -76,6 +78,7 @@ module AnnDataFormat
export anndata_as_daf
export daf_as_anndata

using CategoricalArrays
using Daf.Data
using Daf.Formats
using Daf.Generic
Expand Down Expand Up @@ -209,16 +212,19 @@ function verify_is_supported_type(
property::AbstractString,
unsupported_handler::AbnormalHandler,
)::Nothing
if value isa StorageMatrix && !(value isa Muon.TransposedDataset) && major_axis(value) == nothing
report_unsupported(name, unsupported_handler, "type not in row/column-major layout: $(typeof(value))\n") # untested
end
if value isa CategoricalArray
return nothing # untested
end
if !(value isa supported_type)
report_unsupported(
name,
unsupported_handler,
"unsupported type for $(property): $(typeof(value))\nsupported type is: $(supported_type)\n",
)
end
if value isa StorageMatrix && !(value isa Muon.TransposedDataset) && major_axis(value) == nothing
report_unsupported(name, unsupported_handler, "type not in row/column-major layout: $(typeof(value))\n") # untested
end
return nothing
end

Expand Down Expand Up @@ -280,6 +286,15 @@ end
function copy_supported_vectors(frame::DataFrame, memory::MemoryDaf, axis::AbstractString)::Nothing
for column in names(frame)
vector = frame[!, column]
if vector isa CategoricalVector
vector = [ # untested
if value === missing
""
else
string(value)
end for value in vector
]
end
if vector isa StorageVector
set_vector!(memory, axis, column, vector)
end
Expand All @@ -300,7 +315,7 @@ function copy_supported_matrices(
rows_axis::AbstractString,
columns_axis::AbstractString,
)::Nothing
for (name, matrix) in dict
for (name, matrix) in dict # NOJET
copy_supported_matrix(access_matrix(matrix), memory, rows_axis, columns_axis, name)
end
end
Expand Down

0 comments on commit b14ada4

Please sign in to comment.