Skip to content

Commit

Permalink
Fix relayout issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
orenbenkiki committed Apr 9, 2024
1 parent 252d18f commit f3f43a1
Show file tree
Hide file tree
Showing 19 changed files with 104 additions and 75 deletions.
2 changes: 1 addition & 1 deletion deps/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
set -e -o pipefail
deps/clean.sh
rm -rf tracefile.info src/*.cov src/*/*.cov test/*.cov
JULIA_DEBUG="" JULIA_NUM_THREADS=4 julia --color=no --code-coverage=tracefile.info deps/test.jl "$@" \
|| (deps/clean.sh && false)
2 changes: 1 addition & 1 deletion docs/v0.1.0/.documenter-siteinfo.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-08T16:36:49","documenter_version":"1.3.0"}}
{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-04-09T14:52:15","documenter_version":"1.3.0"}}
4 changes: 2 additions & 2 deletions docs/v0.1.0/adapters.html
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ <h1 id="Adapters">
# under a different name.

result = adapter(
&quot;example&quot;, # A name to use to generate the temporary `Daf` data names.
viewer(daf; ...), # How to view the input in the way expected by the computation.
viewer(daf; ...), # How to view the input in the way expected by the computation.
name = &quot;example&quot;, # A name to use to generate the temporary `Daf` data names.
axes = ..., data = ..., # How and what to view from the output for copying back into `daf`.
empty = ..., # If the input view specifies a subset of some axes.
) do adapted # The writable adapted data we can pass to the computation.
Expand Down
3 changes: 2 additions & 1 deletion docs/v0.1.0/formats.html
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,8 @@ <h3 id="Matrix-properties">
format::FormatReader,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
[for_relayout::Bool = false]
)::Bool
</code>
</pre>
Expand Down
2 changes: 1 addition & 1 deletion docs/v0.1.0/search_index.js

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions docs/v0.1.0/views.html
Original file line number Diff line number Diff line change
Expand Up @@ -569,12 +569,6 @@ <h1 id="Views">
<code>VIEW_ALL_MATRICES
</code>), all matrix properties of all the (exposed) axes will also be exposed.
</p>
<p>The order of the axes does not matter, so
<code>data = [(&quot;gene&quot;, &quot;cell&quot;, &quot;UMIs&quot;) =&gt; &quot;=&quot;]
</code> has the same effect as
<code>data = [(&quot;cell&quot;, &quot;gene&quot;, &quot;UMIs&quot;) =&gt; &quot;=&quot;]
</code>.
</p>
<p>That is, assuming a
<code>gene
</code> and
Expand All @@ -587,6 +581,12 @@ <h1 id="Views">
<code>log_UMIs
</code> for each cell and gene.
</p>
<p>The order of the axes does not matter, so
<code>data = [(&quot;gene&quot;, &quot;cell&quot;, &quot;UMIs&quot;) =&gt; &quot;=&quot;]
</code> has the same effect as
<code>data = [(&quot;cell&quot;, &quot;gene&quot;, &quot;UMIs&quot;) =&gt; &quot;=&quot;]
</code>.
</p>
<div class="admonition is-info">
<header class="admonition-header">Note
</header>
Expand Down
4 changes: 2 additions & 2 deletions src/adapters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ daf = ... # Some input `Daf` data we wish to compute on.
# under a different name.
result = adapter(
"example", # A name to use to generate the temporary `Daf` data names.
viewer(daf; ...), # How to view the input in the way expected by the computation.
viewer(daf; ...), # How to view the input in the way expected by the computation.
name = "example", # A name to use to generate the temporary `Daf` data names.
axes = ..., data = ..., # How and what to view from the output for copying back into `daf`.
empty = ..., # If the input view specifies a subset of some axes.
) do adapted # The writable adapted data we can pass to the computation.
Expand Down
10 changes: 7 additions & 3 deletions src/chains.jl
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,18 @@ function Formats.format_has_matrix(
chain::AnyChain,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false,
)::Bool
for daf in chain.dafs
for daf in reverse(chain.dafs)
if Formats.format_has_axis(daf, rows_axis; for_change = false) &&
Formats.format_has_axis(daf, columns_axis; for_change = false) &&
Formats.format_has_matrix(daf, rows_axis, columns_axis, name)
Formats.format_has_matrix(daf, rows_axis, columns_axis, name; for_relayout = for_relayout)
return true
end
if for_relayout
return false # untested
end
end
return false
end
Expand Down
5 changes: 3 additions & 2 deletions src/files_format.jl
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,8 @@ function Formats.format_has_matrix(
files::FilesDaf,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false, # NOLINT
)::Bool
return ispath("$(files.path)/matrices/$(rows_axis)/$(columns_axis)/$(name).json")
end
Expand Down Expand Up @@ -618,7 +619,7 @@ function Formats.format_relayout_matrix!(
columns_axis::AbstractString,
name::AbstractString,
)::Nothing
matrix = Formats.get_matrix_through_cache(files, rows_axis, columns_axis, name)
matrix = Formats.get_matrix_through_cache(files, rows_axis, columns_axis, name).array

if matrix isa SparseMatrixCSC
colptr, rowval, nzval = Formats.format_empty_sparse_matrix!(
Expand Down
11 changes: 7 additions & 4 deletions src/formats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,8 @@ function format_get_vector end
format::FormatReader,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
[for_relayout::Bool = false]
)::Bool
Implement checking whether a matrix property with some `name` exists for the `rows_axis` and the `columns_axis` in
Expand Down Expand Up @@ -665,7 +666,8 @@ end

function get_vector_through_cache(format::FormatReader, axis::AbstractString, name::AbstractString)::StorageVector
return get_through_cache(format, vector_cache_key(axis, name), StorageVector) do
return format_get_vector(format, axis, name)
vector = format_get_vector(format, axis, name)
return as_named_vector(format, axis, vector)
end
end

Expand All @@ -674,9 +676,10 @@ function get_matrix_through_cache(
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
)::StorageMatrix
)::NamedArray
return get_through_cache(format, matrix_cache_key(rows_axis, columns_axis, name), StorageMatrix) do
return format_get_matrix(format, rows_axis, columns_axis, name)
matrix = format_get_matrix(format, rows_axis, columns_axis, name)
return as_named_matrix(format, rows_axis, columns_axis, matrix)
end
end

Expand Down
5 changes: 3 additions & 2 deletions src/h5df_format.jl
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,8 @@ function Formats.format_has_matrix(
h5df::H5df,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false, # NOLINT
)::Bool
matrices_group = h5df.root["matrices"]
@assert matrices_group isa HDF5.Group
Expand Down Expand Up @@ -737,7 +738,7 @@ function Formats.format_relayout_matrix!(
columns_axis::AbstractString,
name::AbstractString,
)::Nothing
matrix = Formats.get_matrix_through_cache(h5df, rows_axis, columns_axis, name)
matrix = Formats.get_matrix_through_cache(h5df, rows_axis, columns_axis, name).array

if matrix isa SparseMatrixCSC
colptr, rowval, nzval = Formats.format_empty_sparse_matrix!(
Expand Down
5 changes: 4 additions & 1 deletion src/matrix_layouts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,10 @@ end

function relayout!(matrix::AbstractSparseMatrix)::AbstractMatrix
@assert require_major_axis(matrix) == Columns
return transpose(SparseMatrixCSC(transpose(matrix)))
@debug "relayout! $(depict(matrix)) {" # NOLINT
result = transpose(SparseMatrixCSC(transpose(matrix)))
@debug "relayout! $(depict(result)) }" # NOLINT
return result
end

function relayout!(matrix::AbstractMatrix)::AbstractMatrix
Expand Down
3 changes: 2 additions & 1 deletion src/memory_format.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ function Formats.format_has_matrix(
memory::MemoryDaf,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false, # NOLINT
)::Bool
return haskey(memory.matrices[rows_axis][columns_axis], name)
end
Expand Down
26 changes: 14 additions & 12 deletions src/queries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ struct Axis <: Query
axis_name::AbstractString
end

function get_query(daf::DafReader, axis::Axis; cache::Bool = true)::NamedArray
function get_query(daf::DafReader, axis::Axis; cache::Bool = true)::AbstractStringVector
return get_query(daf, QuerySequence((axis,)); cache = cache)
end

Expand Down Expand Up @@ -1476,7 +1476,7 @@ end
function Base.getindex(
daf::DafReader,
query::Union{Query, AbstractString},
)::Union{AbstractStringSet, StorageScalar, NamedArray}
)::Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray}
return get_query(daf, query)
end

Expand All @@ -1498,15 +1498,15 @@ function get_query(
daf::DafReader,
query_string::AbstractString;
cache::Bool = true,
)::Union{AbstractStringSet, StorageScalar, NamedArray}
)::Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray}
return get_query(daf, Query(query_string); cache = cache)
end

function get_query(
daf::DafReader,
query_sequence::QuerySequence;
cache::Bool = true,
)::Union{AbstractStringSet, StorageScalar, NamedArray}
)::Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray}
cache_key = join([string(query_operation) for query_operation in query_sequence.query_operations], " ")
cached_entry = get(daf.internal.cache, cache_key, nothing)
if cached_entry !== nothing
Expand All @@ -1528,7 +1528,7 @@ function get_query(
Formats.cache_data!(daf, cache_key, result, QueryData)
store_cached_dependency_keys!(daf, cache_key, dependency_keys)
end
@debug "get_query daf: $(depict(daf)) query_sequence: $(query_sequence) cache: $(cache) cached result: $(depict(result))"
@debug "get_query daf: $(depict(daf)) query_sequence: $(query_sequence) cache: $(cache) result: $(depict(result))"
return result
end
end
Expand Down Expand Up @@ -1585,7 +1585,7 @@ end

function get_query_result(
query_state::QueryState,
)::Tuple{Union{AbstractStringSet, StorageScalar, NamedArray}, Set{AbstractString}}
)::Tuple{Union{AbstractStringSet, AbstractStringVector, StorageScalar, NamedArray}, Set{AbstractString}}
if is_all(query_state, (AbstractStringSet,))
return get_names_result(query_state)
elseif is_all(query_state, (ScalarState,))
Expand Down Expand Up @@ -1635,19 +1635,21 @@ function get_scalar_result(query_state::QueryState)::Tuple{StorageScalar, Set{Ab
return scalar_state.scalar_value, scalar_state.dependency_keys
end

function get_axis_result(query_state::QueryState)::Tuple{Union{AbstractString, NamedArray}, Set{AbstractString}}
function get_axis_result(
query_state::QueryState,
)::Tuple{Union{AbstractString, AbstractStringVector}, Set{AbstractString}}
axis_state = pop!(query_state.stack)
@assert axis_state isa AxisState

axis_modifier = axis_state.axis_modifier
axis_entries = get_axis(query_state.daf, axis_state.axis_name)
if axis_modifier isa Int
return get_axis(query_state.daf, axis_state.axis_name)[axis_modifier], axis_state.dependency_keys
return axis_entries[axis_modifier], axis_state.dependency_keys
else
named_vector = get_vector(query_state.daf, axis_state.axis_name, "name")
if axis_modifier isa Vector{Bool}
named_vector = named_vector[axis_modifier]
axis_entries = axis_entries[axis_modifier]
end
return named_vector, axis_state.dependency_keys
return axis_entries, axis_state.dependency_keys
end
end

Expand Down Expand Up @@ -3578,7 +3580,7 @@ function get_frame(
end
end

row_names = get_query(daf, axis_query; cache = cache).array
row_names = get_query(daf, axis_query; cache = cache)
@assert row_names isa AbstractStringVector

if columns === nothing
Expand Down
5 changes: 3 additions & 2 deletions src/read_only.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,10 @@ function Formats.format_has_matrix(
read_only_view::DafReadOnlyWrapper,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false,
)::Bool
return Formats.format_has_matrix(read_only_view.daf, rows_axis, columns_axis, name)
return Formats.format_has_matrix(read_only_view.daf, rows_axis, columns_axis, name; for_relayout = for_relayout)
end

function Formats.format_matrix_names(
Expand Down
12 changes: 6 additions & 6 deletions src/readers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -586,18 +586,18 @@ function get_matrix(
if relayout && Formats.format_has_matrix(daf, columns_axis, rows_axis, name)
upgrade_to_write_lock(daf)
result_prefix = "relayout "
if daf isa DafWriter
if daf isa DafWriter &&
Formats.format_has_matrix(daf, columns_axis, rows_axis, name; for_relayout = true)
Formats.format_relayout_matrix!(daf, columns_axis, rows_axis, name)
else
cache_key = Formats.matrix_cache_key(rows_axis, columns_axis, name)
cache_entry = get!(daf.internal.cache, cache_key) do
Formats.store_cached_dependency_key!(daf, cache_key, Formats.axis_cache_key(rows_axis))
Formats.store_cached_dependency_key!(daf, cache_key, Formats.axis_cache_key(columns_axis))
flipped_matrix = Formats.get_matrix_through_cache(daf, columns_axis, rows_axis, name)
return CacheEntry(
MemoryData,
as_named_matrix(daf, rows_axis, columns_axis, transpose(relayout!(flipped_matrix))),
)
flipped_matrix = Formats.get_matrix_through_cache(daf, columns_axis, rows_axis, name).array
relayout_matrix = relayout!(flipped_matrix)
transposed_matrix = transpose(relayout_matrix)
return CacheEntry(MemoryData, as_named_matrix(daf, rows_axis, columns_axis, transposed_matrix))
end
matrix = cache_entry.data
end
Expand Down
28 changes: 19 additions & 9 deletions src/views.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ value for matrices can again be `"="` to expose the property as is, or the suffi
pair is `("*", "*", "*") => "="` (or, `VIEW_ALL_MATRICES`), all matrix properties of all the (exposed) axes will also be
exposed.
The order of the axes does not matter, so
`data = [("gene", "cell", "UMIs") => "="]` has the same effect as `data = [("cell", "gene", "UMIs") => "="]`.
That is, assuming a `gene` and `cell` axes were exposed by the `axes` parameter, then specifying that
`("cell", "gene", "log_UMIs") => q": UMIs % Log base 2 eps"` will expose the matrix `log_UMIs` for each cell and gene.
The order of the axes does not matter, so
`data = [("gene", "cell", "UMIs") => "="]` has the same effect as `data = [("cell", "gene", "UMIs") => "="]`.
!!! note
Due to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs
Expand Down Expand Up @@ -562,19 +562,28 @@ function collect_matrix(
else
@assert matrix_query isa Query
end
matrix_query = fetch_rows_axis.query |> fetch_columns_axis.query |> matrix_query
dimensions = query_result_dimensions(matrix_query)

full_matrix_query = fetch_rows_axis.query |> fetch_columns_axis.query |> matrix_query
dimensions = query_result_dimensions(full_matrix_query)
if dimensions != 2
error(
"$(QUERY_TYPE_BY_DIMENSIONS[dimensions + 1]) query: $(matrix_query)\n" *
"$(QUERY_TYPE_BY_DIMENSIONS[dimensions + 1]) query: $(full_matrix_query)\n" *
"for the matrix: $(matrix_name)\n" *
"for the rows axis: $(rows_axis_name)\n" *
"and the columns axis: $(columns_axis_name)\n" *
"for the view: $(view_name)\n" *
"of the daf data: $(daf.name)",
)
end
collected_matrices[rows_axis_name][columns_axis_name][matrix_name] = Fetch{StorageMatrix}(matrix_query, nothing)
collected_matrices[rows_axis_name][columns_axis_name][matrix_name] =
Fetch{StorageMatrix}(full_matrix_query, nothing)

if rows_axis_name != columns_axis_name
flipped_matrix_query = fetch_columns_axis.query |> fetch_rows_axis.query |> matrix_query
@assert query_result_dimensions(flipped_matrix_query) == 2
collected_matrices[columns_axis_name][rows_axis_name][matrix_name] =
Fetch{StorageMatrix}(flipped_matrix_query, nothing)
end
end
return nothing
end
Expand Down Expand Up @@ -630,7 +639,7 @@ function Formats.format_get_axis(view::DafView, axis::AbstractString)::AbstractS
fetch_axis = view.axes[axis]
axis_names = fetch_axis.value
if axis_names === nothing
axis_names = as_read_only_array(get_query(view.daf, fetch_axis.query).array)
axis_names = as_read_only_array(get_query(view.daf, fetch_axis.query))
if !(eltype(axis_names) <: AbstractString)
error(
"non-String vector of: $(eltype(axis_names))\n" *
Expand Down Expand Up @@ -670,7 +679,8 @@ function Formats.format_has_matrix(
view::DafView,
rows_axis::AbstractString,
columns_axis::AbstractString,
name::AbstractString,
name::AbstractString;
for_relayout::Bool = false, # NOLINT
)::Bool
return haskey(view.matrices[rows_axis][columns_axis], name)
end
Expand Down
Loading

0 comments on commit f3f43a1

Please sign in to comment.