diff --git a/CHANGELOG.md b/CHANGELOG.md index 49a2ced..bf7158a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.3] - 2024-11-06 + +### Added + +* Package extension for `Muon.jl` that allows loading data from .h5ad files using the functions `create_datamatrix`, `create_var` and `create_obs`. + +### Fixed + +* Deprecated old `loadh5ad` function that only supported some versions of the .h5ad format. + ## [0.4.2] - 2024-09-27 ### Fixed diff --git a/Project.toml b/Project.toml index 1f74e63..e9619b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SingleCellProjections" uuid = "03d38035-ed2f-4a36-82eb-797f1727ab2e" authors = ["Rasmus Henningsson "] -version = "0.4.2" +version = "0.4.3" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" @@ -23,13 +23,15 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" ThreadedSparseArrays = "59d54670-b8ac-4d81-ab7a-bb56233e17ab" [weakdeps] -Requires = "ae029012-a4dd-5104-9daa-d747884805df" +Muon = "446846d7-b4ce-489d-bf74-72da18fe3629" PrincipalMomentAnalysis = "6a3ba550-3b7f-11e9-2734-d9178ad1e8db" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" TSne = "24678dba-d5e9-5843-a4c6-250288b04835" UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" [extensions] +SingleCellProjectionsMuonExt = "Muon" SingleCellProjectionsPrincipalMomentAnalysisExt = "PrincipalMomentAnalysis" SingleCellProjectionsStableRNGsExt = "StableRNGs" SingleCellProjectionsTSneExt = "TSne" diff --git a/ext/SingleCellProjectionsMuonExt.jl b/ext/SingleCellProjectionsMuonExt.jl new file mode 100644 index 0000000..2a11973 --- /dev/null +++ b/ext/SingleCellProjectionsMuonExt.jl @@ -0,0 +1,193 @@ +module SingleCellProjectionsMuonExt + +using SingleCellProjections +using DataFrames + +if isdefined(Base, :get_extension) + using Muon: AnnData, AlignedMapping +else + using ..Muon: AnnData, AlignedMapping +end + + +function aligned_mapping_type(am::AlignedMapping) + ref = am.ref + am === ref.layers && return :layers + am === ref.obsm && return :obsm + am === ref.obsp && return :obsp + am === ref.varm && return :varm + am === ref.varp && return :varp + throw(ArgumentError("Unknown AlignedMapping")) +end + +""" + create_var(a::AnnData) + +Create a `DataFrame` where the first column contains `var` IDs and the remaining columns contain the `var` annotations from the `AnnData` object. + +!!! note + The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release. + +See also: [`create_datamatrix`](@ref), [`create_obs`](@ref) +""" +SingleCellProjections.create_var(a::AnnData) = + insertcols(a.var, 1, :id=>collect(a.var_names); makeunique=true) + +""" + create_obs(a::AnnData) + +Create a `DataFrame` where the first column contains `obs` IDs and the remaining columns contain the `obs` annotations from the `AnnData` object. + +!!! note + The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release. + +See also: [`create_datamatrix`](@ref), [`create_var`](@ref) +""" +SingleCellProjections.create_obs(a::AnnData) = + insertcols(a.obs, 1, :cell_id=>collect(a.obs_names); makeunique=true) + +get_var(a::AnnData; add_var) = + add_var ? create_var(a) : DataFrame(; id=collect(a.var_names)) +get_obs(a::AnnData; add_obs) = + add_obs ? create_obs(a) : DataFrame(; cell_id=collect(a.obs_names)) + + +function convert_matrix(::Type{T}, X) where T + eltype(X) <: T && return X + convert.(T, X) # handles both sparse and dense cases, gets rid of transposes +end + + + +function _transpose(X::PermutedDimsArray) + Xt = parent(X) + @assert PermutedDimsArray(Xt, (2,1)) === X + Xt +end +_transpose(X) = X' + + +""" + create_datamatrix([T], a::AnnData; add_var=false, add_obs=false) + create_datamatrix([T], am::AlignedMapping, name; add_var=false, add_obs=false) + +Creates a `DataMatrix` from an `AnnData` object. +By default, the main matrix `X` is retrieved from `a::AnnData`. +It is also possible to create `DataMatrices` from named objects in: `a.layers`, `a.obsm`, `a.obsp`, `a.varm` and `a.varp`. See examples below. + +The optional parameter `T` determines the `eltype` of the returned matrix. If specified, the matrix will be converted to have this `eltype`. + +kwargs: +* add_var: Add `var` from the AnnData object to the returned `DataMatrix` (when applicable). +* add_obs: Add `obs` from the AnnData object to the returned `DataMatrix` (when applicable). + +!!! note + The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release. + +# Examples + +All examples below assume that an AnnData object has been loaded first: +```julia +julia> using Muon + +julia> a = readh5ad("path/to/file.h5ad"); +``` + +* Load the main matrix `X` from an AnnData object. +```julia +julia> create_datamatrix(a) +DataMatrix (123 variables and 456 observations) + SparseMatrixCSC{Float32, Int32} + Variables: id + Observations: cell_id +``` + +* Load the main matrix `X` from an AnnData object, and add `var`/`obs` annotations. +```julia +julia> create_datamatrix(a; add_var=true, add_obs=true) +DataMatrix (123 variables and 456 observations) + SparseMatrixCSC{Float32, Int32} + Variables: id, feature_type, ... + Observations: cell_id, cell_type, ... +``` + +* Load the main matrix `X` from an AnnData object, with eltype `Int`. NB: This will fail if the matrix is not a count matrix. +```julia +julia> create_datamatrix(Int, a) +DataMatrix (123 variables and 456 observations) + SparseMatrixCSC{Int64, Int32} + Variables: id + Observations: cell_id +``` + +* Load the matrix named `raw_counts` from `layers`, with eltype `Int`. NB: This will fail if the matrix is not a count matrix. +```julia +julia> create_datamatrix(Int, a.layers, "raw_counts") +DataMatrix (123 variables and 456 observations) + SparseMatrixCSC{Int64, Int32} + Variables: id + Observations: cell_id +``` + +* Load the matrix named `UMAP` from `obsm`. +```julia +julia> create_datamatrix(a.obsm, "UMAP") +DataMatrix (2 variables and 456 observations) + Matrix{Float64} + Variables: id + Observations: cell_id +``` + +See also: [`create_var`](@ref), [`create_obs`](@ref) +""" +function SingleCellProjections.create_datamatrix(::Type{T}, a::AnnData; add_var=false, add_obs=false) where T + X = _transpose(a.X) + var = get_var(a; add_var) + obs = get_obs(a; add_obs) + X = convert_matrix(T, X) + DataMatrix(X, var, obs) +end +SingleCellProjections.create_datamatrix(a::AnnData; kwargs...) = create_datamatrix(Any, a; kwargs...) + +function SingleCellProjections.create_datamatrix(::Type{T}, am::AlignedMapping, name; add_var=false, add_obs=false) where T + a = am.ref + am_type = aligned_mapping_type(am) + X = am[name] + + new_ids = nothing + if X isa DataFrame + new_ids = names(X) + X = Matrix(X) + end + + @assert ndims(X) == 2 "Expected DataMatrix to have 2 dimensions, got $(ndims(X))" + + if am_type == :layers + X = _transpose(X) + var = get_var(a; add_var) + obs = get_obs(a; add_obs) + elseif am_type == :obsm + X = _transpose(X) + id = @something new_ids string.("Dim", 1:size(X,1)) + var = DataFrame(; id) + obs = get_obs(a; add_obs) + elseif am_type == :obsp + X = _transpose(X) + var = obs = get_obs(a; add_obs) + elseif am_type == :varm + var = get_var(a; add_var) + id = @something new_ids string.("Dim", 1:size(X,2)) + obs = DataFrame(; id) + elseif am_type == :varp + var = obs = get_var(a; add_var) + end + + X = convert_matrix(T, X) + DataMatrix(X, var, obs) +end +SingleCellProjections.create_datamatrix(am::AlignedMapping, name; kwargs...) = create_datamatrix(Any, am, name; kwargs...) + + + + +end diff --git a/src/SingleCellProjections.jl b/src/SingleCellProjections.jl index d1eae2a..ad7cc9c 100644 --- a/src/SingleCellProjections.jl +++ b/src/SingleCellProjections.jl @@ -58,7 +58,10 @@ export ttest_table, mannwhitney!, mannwhitney, - mannwhitney_table + mannwhitney_table, + create_datamatrix, + create_var, + create_obs using LinearAlgebra import LinearAlgebra: svd @@ -140,6 +143,7 @@ include("precompile.jl") @require TSne="24678dba-d5e9-5843-a4c6-250288b04835" include("../ext/SingleCellProjectionsTSneExt.jl") @require PrincipalMomentAnalysis="6a3ba550-3b7f-11e9-2734-d9178ad1e8db" include("../ext/SingleCellProjectionsPrincipalMomentAnalysisExt.jl") @require StableRNGs="860ef19b-820b-49d6-a774-d7a799459cd3" include("../ext/SingleCellProjectionsStableRNGsExt.jl") + @require Muon="446846d7-b4ce-489d-bf74-72da18fe3629" include("../ext/SingleCellProjectionsMuonExt.jl") end end diff --git a/src/datamatrix.jl b/src/datamatrix.jl index f154b16..d3ee16c 100644 --- a/src/datamatrix.jl +++ b/src/datamatrix.jl @@ -44,7 +44,7 @@ struct DataMatrix{T,Tv,To} end validateunique_var(var, 1; report=duplicate_var) - validateunique_var(obs, 1; report=duplicate_obs) + validateunique_obs(obs, 1; report=duplicate_obs) new{T,Tv,To}(matrix, var, obs, models) end end diff --git a/src/h5ad.jl b/src/h5ad.jl index e3926f7..1005c6f 100644 --- a/src/h5ad.jl +++ b/src/h5ad.jl @@ -1,3 +1,6 @@ +function create_datamatrix end +function create_var end +function create_obs end _readh5ad_dataframe_string_array(g) = read(g) @@ -54,8 +57,13 @@ end loadh5ad(filename; var_id_column=:id, obs_id_column=:id) Experimental loading of .h5ad files. + +!!! note + This function is deprecated. Load `Muon.jl` and see help for `create_datamatrix`. """ function loadh5ad(filename; obs_id_column=:id, var_id_col=:id) + @warn "loadh5ad is deprecated, please load Muon.jl and see help for `create_datamatrix`." maxlog=1 + h5open(filename) do h5 @assert read(attributes(h5), "encoding-type") == "anndata"