From d76945cb3566fef75d5750f3edddde5148a96ae2 Mon Sep 17 00:00:00 2001 From: Oren Ben-Kiki Date: Fri, 29 Mar 2024 23:41:50 +0300 Subject: [PATCH] Cache named vectors/matrices, generalize relayout. --- docs/v0.1.0/.documenter-siteinfo.json | 2 +- src/data.jl | 72 +++----------------------- src/formats.jl | 74 +++++++++++++++++++++++++-- src/matrix_layouts.jl | 6 ++- test/matrix_layouts.jl | 9 ++-- test/runtests.jl | 5 -- 6 files changed, 86 insertions(+), 82 deletions(-) diff --git a/docs/v0.1.0/.documenter-siteinfo.json b/docs/v0.1.0/.documenter-siteinfo.json index d6b5455..604dd76 100644 --- a/docs/v0.1.0/.documenter-siteinfo.json +++ b/docs/v0.1.0/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-03-29T21:22:31","documenter_version":"1.3.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-03-29T23:41:20","documenter_version":"1.3.0"}} \ No newline at end of file diff --git a/src/data.jl b/src/data.jl index 27b892d..306da5c 100644 --- a/src/data.jl +++ b/src/data.jl @@ -83,6 +83,9 @@ using NamedArrays using SparseArrays import Daf.Formats +import Daf.Formats.as_named_matrix +import Daf.Formats.as_named_vector +import Daf.Formats.as_read_only_array import Daf.Formats.CacheEntry import Daf.Formats.FormatReader import Daf.Formats.FormatWriter @@ -1509,7 +1512,10 @@ function get_matrix( Formats.store_cached_dependency_key!(daf, cache_key, Formats.axis_cache_key(rows_axis)) Formats.store_cached_dependency_key!(daf, cache_key, Formats.axis_cache_key(columns_axis)) flipped_matrix = Formats.get_matrix_through_cache(daf, columns_axis, rows_axis, name) - return CacheEntry(MemoryData, transpose(relayout!(flipped_matrix))) + return CacheEntry( + MemoryData, + as_named_matrix(daf, rows_axis, columns_axis, transpose(relayout!(flipped_matrix))), + ) end matrix = cache_entry.data end @@ -1613,22 +1619,6 @@ function require_not_name(daf::DafReader, axis::AbstractString, name::AbstractSt return nothing end -function as_read_only_array(array::SparseArrays.ReadOnly)::SparseArrays.ReadOnly - return array -end - -function as_read_only_array(array::NamedArray)::NamedArray - if array.array isa SparseArrays.ReadOnly - return array # untested - else - return NamedArray(as_read_only_array(array.array), array.dicts, array.dimnames) - end -end - -function as_read_only_array(array::AbstractArray)::SparseArrays.ReadOnly - return SparseArrays.ReadOnly(array) -end - function require_dim_name( daf::DafReader, axis::AbstractString, @@ -1657,54 +1647,6 @@ function require_axis_names( end end -function as_named_vector(daf::DafReader, axis::AbstractString, vector::NamedVector)::NamedArray - return vector -end - -function as_named_vector(daf::DafReader, axis::AbstractString, vector::AbstractVector)::NamedArray - axis_names_dict = get(daf.internal.axes, axis, nothing) - if axis_names_dict == nothing - named_array = NamedArray(vector; names = (get_axis(daf, axis),), dimnames = (axis,)) - daf.internal.axes[axis] = named_array.dicts[1] - return named_array - - else - return NamedArray(vector, (axis_names_dict,), (axis,)) - end -end - -function as_named_matrix( - daf::DafReader, - rows_axis::AbstractString, - columns_axis::AbstractString, - matrix::NamedMatrix, -)::NamedArray - return matrix -end - -function as_named_matrix( - daf::DafReader, - rows_axis::AbstractString, - columns_axis::AbstractString, - matrix::AbstractMatrix, -)::NamedArray - rows_axis_names_dict = get(daf.internal.axes, rows_axis, nothing) - columns_axis_names_dict = get(daf.internal.axes, columns_axis, nothing) - if rows_axis_names_dict == nothing || columns_axis_names_dict == nothing - named_array = NamedArray( - matrix; - names = (get_axis(daf, rows_axis), get_axis(daf, columns_axis)), - dimnames = (rows_axis, columns_axis), - ) - daf.internal.axes[rows_axis] = named_array.dicts[1] - daf.internal.axes[columns_axis] = named_array.dicts[2] - return named_array - - else - return NamedArray(matrix, (rows_axis_names_dict, columns_axis_names_dict), (rows_axis, columns_axis)) - end -end - function base_array(array::AbstractArray)::AbstractArray return array end diff --git a/src/formats.jl b/src/formats.jl index 363b610..f0a9fe7 100644 --- a/src/formats.jl +++ b/src/formats.jl @@ -49,6 +49,7 @@ using Daf.MatrixLayouts using Daf.Messages using Daf.StorageTypes using Daf.Tokens +using NamedArrays using OrderedCollections using SparseArrays @@ -94,7 +95,7 @@ If too much data has been cached, call `empty_cache!` to release it. struct CacheEntry cache_type::CacheType - data::Union{AbstractStringSet, StorageScalar, StorageVector, StorageMatrix} + data::Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray} end """ @@ -687,7 +688,7 @@ end function cache_data!( format::FormatReader, cache_key::AbstractString, - data::Union{AbstractStringSet, StorageScalar, StorageVector, StorageMatrix}, + data::Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray}, cache_type::CacheType, )::Nothing @assert format.internal.writer_thread[1] == threadid() @@ -756,7 +757,8 @@ function cache_vector!( cache_type::CacheType, )::Nothing cache_key = vector_cache_key(axis, name) - cache_data!(format, cache_key, vector, cache_type) + named_vector = as_named_vector(format, axis, vector) + cache_data!(format, cache_key, named_vector, cache_type) store_cached_dependency_key!(format, cache_key, axis_cache_key(axis)) return nothing end @@ -770,12 +772,76 @@ function cache_matrix!( cache_type::CacheType, )::Nothing cache_key = matrix_cache_key(rows_axis, columns_axis, name) - cache_data!(format, cache_key, matrix, cache_type) + named_matrix = as_named_matrix(format, rows_axis, columns_axis, matrix) + cache_data!(format, cache_key, named_matrix, cache_type) store_cached_dependency_key!(format, cache_key, axis_cache_key(rows_axis)) store_cached_dependency_key!(format, cache_key, axis_cache_key(columns_axis)) return nothing end +function as_named_vector(format::FormatReader, axis::AbstractString, vector::NamedVector)::NamedArray + return vector +end + +function as_named_vector(format::FormatReader, axis::AbstractString, vector::AbstractVector)::NamedArray + axis_names_dict = get(format.internal.axes, axis, nothing) + if axis_names_dict == nothing + names = as_read_only_array(Formats.get_axis_through_cache(format, axis)) + named_array = NamedArray(vector; names = (names,), dimnames = (axis,)) + format.internal.axes[axis] = named_array.dicts[1] + return named_array + + else + return NamedArray(vector, (axis_names_dict,), (axis,)) + end +end + +function as_named_matrix( + format::FormatReader, + rows_axis::AbstractString, + columns_axis::AbstractString, + matrix::NamedMatrix, +)::NamedArray + return matrix +end + +function as_named_matrix( + format::FormatReader, + rows_axis::AbstractString, + columns_axis::AbstractString, + matrix::AbstractMatrix, +)::NamedArray + rows_axis_names_dict = get(format.internal.axes, rows_axis, nothing) + columns_axis_names_dict = get(format.internal.axes, columns_axis, nothing) + if rows_axis_names_dict == nothing || columns_axis_names_dict == nothing + rows_names = as_read_only_array(Formats.get_axis_through_cache(format, rows_axis)) + columns_names = as_read_only_array(Formats.get_axis_through_cache(format, columns_axis)) + named_array = NamedArray(matrix; names = (rows_names, columns_names), dimnames = (rows_axis, columns_axis)) + format.internal.axes[rows_axis] = named_array.dicts[1] + format.internal.axes[columns_axis] = named_array.dicts[2] + return named_array + + else + return NamedArray(matrix, (rows_axis_names_dict, columns_axis_names_dict), (rows_axis, columns_axis)) + end +end + +function as_read_only_array(array::SparseArrays.ReadOnly)::SparseArrays.ReadOnly + return array +end + +function as_read_only_array(array::NamedArray)::NamedArray + if array.array isa SparseArrays.ReadOnly + return array # untested + else + return NamedArray(as_read_only_array(array.array), array.dicts, array.dimnames) + end +end + +function as_read_only_array(array::AbstractArray)::SparseArrays.ReadOnly + return SparseArrays.ReadOnly(array) +end + function scalar_names_cache_key()::String return "? scalars" end diff --git a/src/matrix_layouts.jl b/src/matrix_layouts.jl index 46ba0b2..a8cefbf 100644 --- a/src/matrix_layouts.jl +++ b/src/matrix_layouts.jl @@ -326,9 +326,11 @@ function relayout!(destination::DenseMatrix, source::AbstractMatrix)::DenseMatri error("relayout destination size: $(size(destination))\nis different from source size: $(size(source))") end if issparse(source) - error("relayout dense destination: $(typeof(destination))\nand sparse source: $(typeof(source))") + destination .= source + else + transpose!(destination, transpose(source)) end - return transpose!(destination, transpose(source)) + return destination end function relayout!(destination::AbstractMatrix, source::AbstractMatrix)::AbstractMatrix # untested diff --git a/test/matrix_layouts.jl b/test/matrix_layouts.jl index 5110696..5343cb9 100644 --- a/test/matrix_layouts.jl +++ b/test/matrix_layouts.jl @@ -272,11 +272,10 @@ nested_test("matrix_layouts") do end nested_test("destination_dense") do - destination = rand(4, 6) - @test_throws dedent(""" - relayout dense destination: Matrix{Float64} - and sparse source: SparseMatrixCSC{Float64, Int64} - """) relayout!(destination, source) + destination = transpose(rand(6, 4)) + relayout!(destination, source) + @test major_axis(destination) == Rows + @test destination == source end nested_test("read_only") do diff --git a/test/runtests.jl b/test/runtests.jl index 0be90c2..1f4967c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -59,11 +59,6 @@ function with_unwrapping_exceptions(action::Function)::Any end end -#function test_similar(left::Any, right::Any)::Nothing -# @test "$(left)" == "$(right)" -# return nothing -#end - include("matrix_layouts.jl") include("messages.jl") include("data.jl")