diff --git a/docs/v0.1.0/.documenter-siteinfo.json b/docs/v0.1.0/.documenter-siteinfo.json index 9461178..a9dcd7d 100644 --- a/docs/v0.1.0/.documenter-siteinfo.json +++ b/docs/v0.1.0/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.10.0","generation_timestamp":"2024-04-14T13:08:10","documenter_version":"1.3.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.10.0","generation_timestamp":"2024-04-15T15:52:15","documenter_version":"1.3.0"}} \ No newline at end of file diff --git a/docs/v0.1.0/formats.html b/docs/v0.1.0/formats.html index 44f0e1b..a23bafe 100644 --- a/docs/v0.1.0/formats.html +++ b/docs/v0.1.0/formats.html @@ -1605,8 +1605,8 @@

- -Daf.Formats.format_empty_dense_vector! + +Daf.Formats.format_get_empty_dense_vector! Function @@ -1615,7 +1615,7 @@

-format_empty_dense_vector!(
+format_get_empty_dense_vector!(
     format::FormatWriter,
     axis::AbstractString,
     name::AbstractString,
@@ -1678,8 +1678,8 @@ 

- -Daf.Formats.format_empty_sparse_vector! + +Daf.Formats.format_filled_empty_dense_vector! Function @@ -1688,7 +1688,38 @@

-format_empty_sparse_vector!(
+format_filled_empty_dense_vector!(
+    daf::DafWriter,
+    axis::AbstractString,
+    name::AbstractString,
+    filled_vector::AbstractVector{T},
+)::Nothing where {T <: StorageNumber}
+
+
+

Allow the +format + to perform caching once the empty dense vector has been +filled +. By default this does nothing. +

+
+
+ +
+
+ + + +Daf.Formats.format_get_empty_sparse_vector! + + — +Function + +
+
+
+
+format_get_empty_sparse_vector!(
     format::FormatWriter,
     axis::AbstractString,
     name::AbstractString,
@@ -1708,8 +1739,8 @@ 

in format . The final tuple element is passed to - -format_filled_sparse_vector! + +format_filled_empty_sparse_vector! .

@@ -1732,8 +1763,8 @@

- -Daf.Formats.format_filled_sparse_vector! + +Daf.Formats.format_filled_empty_sparse_vector! Function @@ -1742,7 +1773,7 @@

-format_filled_sparse_vector!(
+format_filled_empty_sparse_vector!(
     format::FormatWriter,
     axis::AbstractString,
     name::AbstractString,
@@ -1764,8 +1795,8 @@ 

- -Daf.Formats.format_empty_dense_matrix! + +Daf.Formats.format_get_empty_dense_matrix! Function @@ -1774,7 +1805,7 @@

-format_empty_dense_matrix!(
+format_get_empty_dense_matrix!(
     format::FormatWriter,
     rows_axis::AbstractString,
     columns_axis::AbstractString,
@@ -1826,8 +1857,8 @@ 

- -Daf.Formats.format_empty_sparse_matrix! + +Daf.Formats.format_filled_empty_dense_matrix! Function @@ -1836,7 +1867,39 @@

-format_empty_sparse_matrix!(
+format_filled_empty_dense_matrix!(
+    daf::DafWriter,
+    rows_axis::AbstractString,
+    columns_axis::AbstractString,
+    name::AbstractString,
+    filled_matrix::AbstractVector{T},
+)::Nothing where {T <: StorageNumber}
+
+
+

Allow the +format + to perform caching once the empty dense matrix has been +filled +. By default this does nothing. +

+
+
+

+
+
+ + + +Daf.Formats.format_get_empty_sparse_matrix! + + — +Function + +
+
+
+
+format_get_empty_sparse_matrix!(
     format::FormatWriter,
     rows_axis::AbstractString,
     columns_axis::AbstractString,
@@ -1857,8 +1920,8 @@ 

in format . The final tuple element is passed to - -format_filled_sparse_matrix! + +format_filled_empty_sparse_matrix! .

@@ -1879,8 +1942,8 @@

- -Daf.Formats.format_filled_sparse_matrix! + +Daf.Formats.format_filled_empty_sparse_matrix! Function @@ -1889,7 +1952,7 @@

-format_filled_dense_matrix!(
+format_filled_empty_dense_matrix!(
     format::FormatWriter,
     rows_axis::AbstractString,
     columns_axis::AbstractString,
@@ -2026,44 +2089,56 @@ 

  • - -Daf.Formats.format_empty_dense_matrix! + +Daf.Formats.format_filled_empty_dense_matrix!
  • - -Daf.Formats.format_empty_dense_vector! + +Daf.Formats.format_filled_empty_dense_vector!
  • - -Daf.Formats.format_empty_sparse_matrix! + +Daf.Formats.format_filled_empty_sparse_matrix!
  • - -Daf.Formats.format_empty_sparse_vector! + +Daf.Formats.format_filled_empty_sparse_vector!
  • - -Daf.Formats.format_filled_sparse_matrix! + +Daf.Formats.format_get_axis
  • - -Daf.Formats.format_filled_sparse_vector! + +Daf.Formats.format_get_empty_dense_matrix!
  • - -Daf.Formats.format_get_axis + +Daf.Formats.format_get_empty_dense_vector! + + +
  • +
  • + +Daf.Formats.format_get_empty_sparse_matrix! + + +
  • +
  • + +Daf.Formats.format_get_empty_sparse_vector!
  • diff --git a/docs/v0.1.0/index.html b/docs/v0.1.0/index.html index 33d0f30..f0bbeb4 100644 --- a/docs/v0.1.0/index.html +++ b/docs/v0.1.0/index.html @@ -1474,44 +1474,56 @@

  • - -Daf.Formats.format_empty_dense_matrix! + +Daf.Formats.format_filled_empty_dense_matrix!
  • - -Daf.Formats.format_empty_dense_vector! + +Daf.Formats.format_filled_empty_dense_vector!
  • - -Daf.Formats.format_empty_sparse_matrix! + +Daf.Formats.format_filled_empty_sparse_matrix!
  • - -Daf.Formats.format_empty_sparse_vector! + +Daf.Formats.format_filled_empty_sparse_vector!
  • - -Daf.Formats.format_filled_sparse_matrix! + +Daf.Formats.format_get_axis
  • - -Daf.Formats.format_filled_sparse_vector! + +Daf.Formats.format_get_empty_dense_matrix!
  • - -Daf.Formats.format_get_axis + +Daf.Formats.format_get_empty_dense_vector! + + +
  • +
  • + +Daf.Formats.format_get_empty_sparse_matrix! + + +
  • +
  • + +Daf.Formats.format_get_empty_sparse_vector!
  • diff --git a/docs/v0.1.0/objects.inv b/docs/v0.1.0/objects.inv index 7dff1c3..bb8d25f 100644 Binary files a/docs/v0.1.0/objects.inv and b/docs/v0.1.0/objects.inv differ diff --git a/docs/v0.1.0/search_index.js b/docs/v0.1.0/search_index.js index da3b848..b4ded10 100644 --- a/docs/v0.1.0/search_index.js +++ b/docs/v0.1.0/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"storage_types.html#Storage-types","page":"Storage types","title":"Storage types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes","category":"page"},{"location":"storage_types.html#Daf.StorageTypes","page":"Storage types","title":"Daf.StorageTypes","text":"Only a restricted set of scalar, matrix and vector types is stored by Daf.\n\nThe set of scalar types is restricted because we need to be able to store them in disk files. This rules out compound types such as Dict. This isn't an issue for vector and matrix elements but is sometimes bothersome for \"scalar\" data (not associated with any axis). If you find yourself needed to store such data, you'll have to serialize it to a string. By convention, we use JSON blobs for such data to maximize portability between different systems.\n\nJulia supports a potentially infinite variety of ways to represent matrices and vectors. Daf is intentionally restricted to specific representations. This has several advantages:\n\nDaf storage formats need only implement storing these restricted representations, which lend themselves to simple storage in consecutive bytes (in memory and/or on disk). These representations also allow for memory-mapping the data from disk files, which allows Daf to deal with data sets larger than the available memory.\nClient code need only worry about dealing with these restricted representations, which limits the amount of code paths required for efficient algorithm implementations. However, you (mostly) need not worry about this when invoking library functions, which have code paths covering all common matrix types. You do need to consider the layout of the data, though (see below).\n\nThis has the downside that Daf doesn't support efficient storage of specialized matrices (to pick a random example, upper triangular matrices). This isn't a great loss, since Daf targets storing arbitrary scientific data (especially biological data), which in general is not of any such special shape. The upside is that all matrices stored and returned by Daf have a clear MatrixLayouts (regardless of whether they are dense or sparse). This allows user code to ensure it is working \"with the grain\" of the data, which is much more efficient.\n\n\n\n\n\n","category":"module"},{"location":"storage_types.html#Storable-types","page":"Storage types","title":"Storable types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes.StorageInteger\nDaf.StorageTypes.StorageFloat\nDaf.StorageTypes.StorageNumber\nDaf.StorageTypes.StorageScalar\nDaf.StorageTypes.StorageScalarBase\nDaf.StorageTypes.StorageVector\nDaf.StorageTypes.StorageMatrix","category":"page"},{"location":"storage_types.html#Daf.StorageTypes.StorageInteger","page":"Storage types","title":"Daf.StorageTypes.StorageInteger","text":"StorageInteger = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64}\n\nInteger number types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageFloat","page":"Storage types","title":"Daf.StorageTypes.StorageFloat","text":"StorageFloat = Union{Float32, Float64}\n\nFloating point number types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageNumber","page":"Storage types","title":"Daf.StorageTypes.StorageNumber","text":"StorageNumber = Union{Bool, StorageInteger, StorageFloat}\n\nNumber types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageScalar","page":"Storage types","title":"Daf.StorageTypes.StorageScalar","text":"StorageScalar = Union{StorageNumber, S} where {S <: AbstractString}\n\nTypes that can be used as scalars, or elements in stored matrices or vectors.\n\nThis is restricted to StorageNumber (including Booleans) and strings. It is arguably too restrictive, as in principle we could support any arbitrary isbitstype. However, in practice this would cause much trouble when accessing the data from other systems (specifically Python and R). Since Daf targets storing scientific data (especially biological data), as opposed to \"anything at all\", this restriction seems reasonable.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageScalarBase","page":"Storage types","title":"Daf.StorageTypes.StorageScalarBase","text":"StorageScalarBase = Union{StorageNumber, AbstractString}\n\nFor using in where clauses when a type needs to be a StorageScalar. That is, write where {T <: StorageScalarBase} instead of where {T <: StorageScalar}, because of the limitations of Julia's type system.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageVector","page":"Storage types","title":"Daf.StorageTypes.StorageVector","text":"StorageVector{T} = AbstractVector{T} where {T <: StorageScalar}\n\nVectors that can be directly stored (and fetched) from Daf storage.\n\nThe element type must be a StorageScalar, to allow storing the data in disk files. Vectors of strings are supported but will be less efficient.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageMatrix","page":"Storage types","title":"Daf.StorageTypes.StorageMatrix","text":"StorageMatrix{T} = AbstractMatrix{T} where {T <: StorageNumber}\n\nMatrices that can be directly stored (and fetched) from Daf storage.\n\nThe element type must be a StorageNumber, to allow efficient storage of the data in disk files. That is, matrices of strings are not supported.\n\nnote: Note\nAll matrices we store must have a clear MatrixLayouts, that is, must be in either row-major or column-major format.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Constructors","page":"Storage types","title":"Constructors","text":"","category":"section"},{"location":"storage_types.html#Storable-types-2","page":"Storage types","title":"Storable types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes.sparse_vector\nDaf.StorageTypes.sparse_matrix_csc","category":"page"},{"location":"storage_types.html#Daf.StorageTypes.sparse_vector","page":"Storage types","title":"Daf.StorageTypes.sparse_vector","text":"sparse_vector(dense::StorageMatrix)::SparseVector\n\nCreate a sparse vector using the smallest unsigned integer type needed for this size of matrix.\n\n\n\n\n\n","category":"function"},{"location":"storage_types.html#Daf.StorageTypes.sparse_matrix_csc","page":"Storage types","title":"Daf.StorageTypes.sparse_matrix_csc","text":"sparse_matrix_csc(dense::StorageMatrix)::SparseMatrixCSC\n\nCreate a sparse matrix using the smallest unsigned integer type needed for this size of matrix.\n\n\n\n\n\n","category":"function"},{"location":"storage_types.html#Index","page":"Storage types","title":"Index","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Pages = [\"storage_types.md\"]","category":"page"},{"location":"generic_functions.html#Generic-Functions","page":"Generic Functions","title":"Generic Functions","text":"","category":"section"},{"location":"generic_functions.html","page":"Generic Functions","title":"Generic Functions","text":"Daf.GenericFunctions\nDaf.GenericFunctions.dedent\nDaf.GenericFunctions.AbnormalHandler\nDaf.GenericFunctions.handle_abnormal","category":"page"},{"location":"generic_functions.html#Daf.GenericFunctions","page":"Generic Functions","title":"Daf.GenericFunctions","text":"Functions that arguably should belong in a more general-purpose package.\n\nWe do not re-export the functions and supporting types defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericFunctions.\n\n\n\n\n\n","category":"module"},{"location":"generic_functions.html#Daf.GenericFunctions.dedent","page":"Generic Functions","title":"Daf.GenericFunctions.dedent","text":"dedent(string::AbstractString; indent::AbstractString = \"\")::String\n\nGiven a possibly multi-line string with a common indentation in each line, strip this indentation from all lines, and replace it with indent. Will also strip any initial and/or final line breaks.\n\n\n\n\n\n","category":"function"},{"location":"generic_functions.html#Daf.GenericFunctions.AbnormalHandler","page":"Generic Functions","title":"Daf.GenericFunctions.AbnormalHandler","text":"The action to take when encountering an \"abnormal\" (but recoverable) operation.\n\nValid values are:\n\nIgnoreHandler - ignore the issue and perform the recovery operation.\n\nWarnHandler - emit a warning using @warn.\n\nErrorHandler - abort the program with an error message.\n\n\n\n\n\n","category":"type"},{"location":"generic_functions.html#Daf.GenericFunctions.handle_abnormal","page":"Generic Functions","title":"Daf.GenericFunctions.handle_abnormal","text":"handle_abnormal(message::Function, handler::AbnormalHandler)::Nothing\n\nCall this when encountering some abnormal, but recoverable, condition. Follow it by the recovery code.\n\nThis will error if the handler is ErrorHandler, and abort the program. If it is WarnHandler, it will just @warn and return. If it is IgnoreHandler it will just return.\n\nThe message is a function that should return an AbstractString to use. For efficiency, it is not invoked if ignoring the condition.\n\n\n\n\n\n","category":"function"},{"location":"generic_functions.html#Index","page":"Generic Functions","title":"Index","text":"","category":"section"},{"location":"generic_functions.html","page":"Generic Functions","title":"Generic Functions","text":"Pages = [\"generic_functions.md\"]","category":"page"},{"location":"tokens.html#Tokens","page":"Tokens","title":"Tokens","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens","category":"page"},{"location":"tokens.html#Daf.Tokens","page":"Tokens","title":"Daf.Tokens","text":"The only exported functions from this module are escape_value and unescape_value which are useful when embedding values into query strings. The rest of the module is documented to give insight into how a query string is broken into Tokens.\n\nIdeally Daf should have used some established parser generator module for parsing queries, making all this unnecessary. However, As of writing this code, Julia doesn't seem to have such a parser generator solution. Therefore, this module provides a simple tokenize function with rudimentary pattern matching which is all we need to parse queries (whose structure is \"trivial\").\n\n\n\n\n\n","category":"module"},{"location":"tokens.html#Escaping","page":"Tokens","title":"Escaping","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.escape_value\nDaf.Tokens.unescape_value\nDaf.Tokens.is_value_char\nDaf.Tokens.VALUE_REGEX","category":"page"},{"location":"tokens.html#Daf.Tokens.escape_value","page":"Tokens","title":"Daf.Tokens.escape_value","text":"escape_value(value::AbstractString)::String\n\nGiven some raw value (name of an axis, axis entry or property, or a parameter value), which may contain special characters, return an escaped version to be used as a single value Token.\n\nWe need to consider the following kinds of characters:\n\nSafe (is_value_char) characters include a - z, A - Z, 0 - 9, _, +, -, and ., as well as any non-ASCII (that is, Unicode) characters. Any sequence of these characters will be considered a single value Token. These cover all the common cases (including signed integer and floating point values).\nAll other ASCII characters are (at least potentially) special, that is, may be used to describe an operation.\nPrefixing any character with a \\ allows using it inside a value Token. This is useful if some name or value contains a special character. For example, if you have a cell whose name is ACTG:Plate1, and you want to access the name of the batch of this specific cell, you will have to write / cell = ACTG\\:Plate1 : batch.\n\nnote: Note\nThe \\ character is also used by Julia inside \"...\" string literals, to escape writing non-printable characters. For example, \"\\n\" is a single-character string containing a line break, and therefore \"\\\\\" is used to write a single \\. Thus the above example would have to be written as \"cell = ACTG\\\\:Plate1 : batch\". This isn't nice.Luckily, Julia also has raw\"...\" string literals that work similarly to Python's r\"...\" strings (in Julia, r\"...\" is a regular expression, not a string). Inside raw string literals, a \\ is a \\ (unless it precedes a \"). Therefore the above example could also be written as raw\"/ cell = ACTG\\:Plate1 : batch, which is more readable.\n\nBack to escape_value - it will prefix any special character with a \\. It is useful if you want to programmatically inject a value. Often this happens when using $(...) to embed values into a query string, e.g., do not write a query / $(axis) @ $(property) as it is unsafe, as any of the embedded variables may contain unsafe characters. You should instead write something like / $(escape_value(axis)) @ $(escape_value(property)).\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.unescape_value","page":"Tokens","title":"Daf.Tokens.unescape_value","text":"unescape_value(escaped::AbstractString)::String\n\nUndo escape_value, that is, given an escaped value with a \\ characters escaping special characters, drop the \\ to get back the original string value.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.is_value_char","page":"Tokens","title":"Daf.Tokens.is_value_char","text":"is_value_char(character::Char)::Bool\n\nReturn whether a character is safe to use inside a value Token (name of an axis, axis entry or property, or a parameter value).\n\nThe safe characters are a - z, A - Z, 0 - 9, _, +, -, and ., as well as any non-ASCII (that is, Unicode) characters.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.VALUE_REGEX","page":"Tokens","title":"Daf.Tokens.VALUE_REGEX","text":"VALUE_REGEX = r\"^(?:[0-9a-zA-Z_.+-]|[^\\x00-\\xFF])+\"\n\nA sequence of is_value_char is considered to be a single value Token. This set of characters was chosen to allow expressing numbers, Booleans and simple names. Any other (ASCII, non-space) character may in principle be used as an operator (possibly in a future version of the code). Therefore, use escape_value to protect any value you embed into the expression.\n\n\n\n\n\n","category":"constant"},{"location":"tokens.html#Encoding","page":"Tokens","title":"Encoding","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.encode_expression\nDaf.Tokens.decode_expression","category":"page"},{"location":"tokens.html#Daf.Tokens.encode_expression","page":"Tokens","title":"Daf.Tokens.encode_expression","text":"encode_expression(expr_string::AbstractString)::String\n\nGiven an expression string to parse, encode any non-ASCII (that is, Unicode) character, as well as any character escaped by a \\, such that the result will only use is_value_char characters. Every encoded character is replaced by _XX using URI encoding, but replacing the % with a _ so we can deal with unescaped % as an operator, so we also need to encode _ as _5F, so we need to encode \\_ as _5C_5F. Isn't encoding fun?\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.decode_expression","page":"Tokens","title":"Daf.Tokens.decode_expression","text":"decode_expression(encoded_string::AbstractString)::String\n\nGiven the results of encode_expression, decode it back to its original form.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Tokenization","page":"Tokens","title":"Tokenization","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.Token\nDaf.Tokens.tokenize\nDaf.Tokens.SPACE_REGEX","category":"page"},{"location":"tokens.html#Daf.Tokens.Token","page":"Tokens","title":"Daf.Tokens.Token","text":"struct Token\n is_operator::Bool\n value::AbstractString\n token_index::Int\n first_index::Int\n last_index::Int\n encoded_string::AbstractString\nend\n\nA parsed token of an expression.\n\nWe distinguish between \"value\" tokens and \"operator\" tokens using is_operator. A value token holds the name of an axis, axis entry or property, or a parameter value, while an operator token is used to identify a query operation to perform. In both cases, the value contains the token string. This goes through both decode_expression and unescape_value so it can be directly used as-is for value tokens.\n\nWe also keep the location (first_index .. last_index) and the (encoded) expression string, to enable generating friendly error messages. There are no line numbers in locations because in Daf we squash our queries to a single-line, under the assumption they are \"relatively simple\". This allows us to simplify the code.\n\n\n\n\n\n","category":"type"},{"location":"tokens.html#Daf.Tokens.tokenize","page":"Tokens","title":"Daf.Tokens.tokenize","text":"tokenize(string::AbstractString, operators::Regex)::Vector{Token}\n\nGiven an expression string, convert it into a vector of Token.\n\nWe first convert everything that matches the SPACE_REGEX into a single space. This squashed the expression into a single line (discarding line breaks and comments), and the squashed expression is used for reporting errors. This is reasonable for dealing with Daf queries which are expected to be \"relatively simple\".\n\nWhen tokenizing, we discard the spaces. Anything that matches the VALUE_REGEX is considered to be a value Token. Anything that matches the operators is considered to be an operator Token. As a special case, '' is converted to an empty string, which is otherwise impossible to represent (write \\'\\' to prevent this). Anything else is reported as an invalid character.\n\nnote: Note\nThe operators regex should only match the start of the string (that is, must start with ^). Also, when using |, you need to list the longer operators first (e.g., ^(?:++|+) as opposed to ^(?:+|++)).\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.SPACE_REGEX","page":"Tokens","title":"Daf.Tokens.SPACE_REGEX","text":"SPACE_REGEX = r\"(?:[\\s\\n\\r]|#[^\\n\\r]*(?:[\\r\\n]|$))+\"sm\n\nOptional white space can separate Token. It is required when there are two consecutive value tokens, but is typically optional around operators. White space includes spaces, tabs, line breaks, and a # ... comment suffix of a line.\n\n\n\n\n\n","category":"constant"},{"location":"tokens.html#Index","page":"Tokens","title":"Index","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Pages = [\"tokens.md\"]","category":"page"},{"location":"chains.html#Chains","page":"Chains","title":"Chains","text":"","category":"section"},{"location":"chains.html","page":"Chains","title":"Chains","text":"Daf.Chains\nDaf.Chains.chain_reader\nDaf.Chains.chain_writer\nDaf.Chains.ReadOnlyChain\nDaf.Chains.WriteChain","category":"page"},{"location":"chains.html#Daf.Chains","page":"Chains","title":"Daf.Chains","text":"View a chain of Daf data as a single data set. This allows creating a small Daf data set that contains extra (or overriding) data on top of a larger read-only data set. In particular this allows creating several such incompatible extra data sets (e.g., different groupings of cells to metacells), without having to duplicate the common (read only) data.\n\n\n\n\n\n","category":"module"},{"location":"chains.html#Daf.Chains.chain_reader","page":"Chains","title":"Daf.Chains.chain_reader","text":"chain_reader(dafs::AbstractVector{F}; name::Maybe{AbstractString} = nothing)::DafReader where {F <: DafReader}\n\nCreate a read-only chain wrapper of DafReaders, presenting them as a single DafReader. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets. However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\nnote: Note\nWhile this verifies the axes are consistent at the time of creating the chain, it's no defense against modifying the chained data after the fact, creating inconsistent axes. Don't do that.\n\n\n\n\n\n","category":"function"},{"location":"chains.html#Daf.Chains.chain_writer","page":"Chains","title":"Daf.Chains.chain_writer","text":"chain_writer(dafs::AbstractVector{F}; name::Maybe{AbstractString} = nothing)::DafWriter where {F <: DafReader}\n\nCreate a chain wrapper for a chain of DafReader data, presenting them as a single DafWriter. This acts similarly to chain_reader, but requires the final entry to be a DafWriter. Any modifications or additions to the chain are directed at this final writer.\n\nnote: Note\nDeletions are only allowed for data that exists only in the final writer. That is, it is impossible to delete from a chain something that exists in any of the readers; it is only possible to override it.\n\n\n\n\n\n","category":"function"},{"location":"chains.html#Daf.Chains.ReadOnlyChain","page":"Chains","title":"Daf.Chains.ReadOnlyChain","text":"struct ReadOnlyChain <: DafReadOnly ... end\n\nA wrapper for a chain of DafReader data, presenting them as a single DafReadOnly. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets. However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\n\n\n\n\n","category":"type"},{"location":"chains.html#Daf.Chains.WriteChain","page":"Chains","title":"Daf.Chains.WriteChain","text":"struct WriteChain <: DafWriter ... end\n\nA wrapper for a chain of DafReader data, with a final [DafWriter], presenting them as a single DafWriter. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets (where the writer has the final word). However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\nAny modifications or additions to the chain are directed at the final writer. Deletions are only allowed for data that exists only in this writer. That is, it is impossible to delete from a chain something that exists in any of the readers; it is only possible to override it.\n\n\n\n\n\n","category":"type"},{"location":"chains.html#Index","page":"Chains","title":"Index","text":"","category":"section"},{"location":"chains.html","page":"Chains","title":"Chains","text":"Pages = [\"chains.md\"]","category":"page"},{"location":"registry.html#Operations-registry","page":"Operations registry","title":"Operations registry","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry","category":"page"},{"location":"registry.html#Daf.Registry","page":"Operations registry","title":"Daf.Registry","text":"Registering element-wise and reduction operations is required, to allow them to be used in a query.\n\nnote: Note\nWe do not re-export everything from here to the main Daf namespace, as it is only of interest for implementers of new query operations. Most users of Daf just stick with the (fairly comprehensive) list of built-in query operations so there's no need to pollute their namespace with these detail.\n\n\n\n\n\n","category":"module"},{"location":"registry.html#Element-wise-operations","page":"Operations registry","title":"Element-wise operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.EltwiseOperation\nDaf.Registry.compute_eltwise","category":"page"},{"location":"registry.html#Daf.Registry.EltwiseOperation","page":"Operations registry","title":"Daf.Registry.EltwiseOperation","text":"Abstract type for all element-wise operations.\n\nIn a string query, this is specified using the % operator (e.g., % Abs, % Log base 2):\n\nEltwiseOperation := % operation ( parameter value )*\n\nSince each EltwiseOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Lookup(\"age\") |> Abs()). For this there should be other constructor(s) tailored for this usage.\n\nAn element-wise operation may be applied to scalar, vector ot matrix data. It will preserve the shape of the data, but changes the value(s), and possibly the data type of the elements. For example, Abs will compute the absolute value of each value.\n\nTo implement a new such operation, the type is expected to be of the form:\n\nstruct MyOperation <: EltwiseOperation\n ... optional parameters ...\nend\n@query_operation MyOperation\n\nMyOperation(operation_name::Token, parameter_values::Dict{String, Token})::MyOperation\n\nThe constructor should use parse_parameter for each of the parameters (for example, using parse_number_assignment). In addition you will need to invoke @query_operation to register the operation so it can be used in a query, and implement the functions listed below. See the query operations module for details and examples.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.compute_eltwise","page":"Operations registry","title":"Daf.Registry.compute_eltwise","text":"compute_eltwise(operation::EltwiseOperation, input::StorageMatrix)::StorageMatrix\ncompute_eltwise(operation::EltwiseOperation, input::StorageVector)::StorageVector\ncompute_eltwise(operation::EltwiseOperation, input_value::Number)::StorageNumber\n\nCompute an EltwiseOperation operation.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Reduction-operations","page":"Operations registry","title":"Reduction operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.ReductionOperation\nDaf.Registry.compute_reduction\nDaf.Registry.reduction_result_type","category":"page"},{"location":"registry.html#Daf.Registry.ReductionOperation","page":"Operations registry","title":"Daf.Registry.ReductionOperation","text":"Abstract type for all reduction operations.\n\nIn a string query, this is specified using the %> operator (e.g., %> Sum, %> Quantile fraction 0.05):\n\nReductionOperation := %> operation ( parameter value )*\n\nSince each ReductionOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Axis(\"gene\") |> Lookup(\"UMIs\") |> Quantile(0.05)). For this there should be other constructor(s) tailored for this usage.\n\nA reduction operation may be applied to matrix or vector data. It will reduce (eliminate) one dimension of the data, and possibly the result will have a different data type than the input. When applied to a vector, the operation will return a scalar. When applied to a matrix, it assumes the matrix is in column-major layout, and will return a vector with one entry per column, containing the result of reducing the column to a scalar.\n\nTo implement a new such operation, the type is expected to be of the form:\n\nstruct MyOperation <: ReductionOperation\n ... optional parameters ...\nend\n\nMyOperation(operation_name::Token, parameter_values::Dict{String, Token})::MyOperation\n\nThe constructor should use parse_parameter for each of the parameters (for example, using typically parse_number_assignment). In addition you will need to invoke @query_operation to register the operation so it can be used in a query, and implement the functions listed below. See the query operations module for details and examples.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.compute_reduction","page":"Operations registry","title":"Daf.Registry.compute_reduction","text":"compute_reduction(operation::ReductionOperation, input::StorageMatrix)::StorageVector\ncompute_reduction(operation::ReductionOperation, input::StorageVector)::StorageNumber\n\nSince each ReductionOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Axis(\"gene\") |> Lookup(\"UMIs\") |> Sum()). For this there should be other constructor(s) tailored for this usage.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Daf.Registry.reduction_result_type","page":"Operations registry","title":"Daf.Registry.reduction_result_type","text":"reduction_result_type(operation::ReductionOperation, eltype::Type)::Type\n\nReturn the data type of the result of the reduction operation if applied to a vector of the specified eltype.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Registering-operations","page":"Operations registry","title":"Registering operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.QueryOperation\nDaf.Registry.@query_operation\nDaf.Registry.register_query_operation","category":"page"},{"location":"registry.html#Daf.Registry.QueryOperation","page":"Operations registry","title":"Daf.Registry.QueryOperation","text":"Abstract interface for all query operations. An actual query is a series of these operations which, when applied to Daf data, compute some result.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.@query_operation","page":"Operations registry","title":"Daf.Registry.@query_operation","text":"struct MyOperation <: EltwiseOperation # Or <: ReductionOperation\n ...\nend\n@query_operation MyOperation\n\nAutomatically call register_query_operation for MyOperation.\n\nNote this will import Daf.Registry.register_query_operation, so it may only be called from the top level scope of a module.\n\n\n\n\n\n","category":"macro"},{"location":"registry.html#Daf.Registry.register_query_operation","page":"Operations registry","title":"Daf.Registry.register_query_operation","text":"register_query_operation(\n type::Type{T},\n source_file::AbstractString,\n source_line::Integer,\n)::Nothing where {T <: Union{EltwiseOperation, ReductionOperation}}\n\nRegister a specific operation so it would be available inside queries. This is required to be able to parse the operation. This is idempotent (safe to invoke multiple times).\n\nThis isn't usually called directly. Instead, it is typically invoked by using the @query_operation macro.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Index","page":"Operations registry","title":"Index","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Pages = [\"registry.md\"]","category":"page"},{"location":"computations.html#Computations","page":"Computations","title":"Computations","text":"","category":"section"},{"location":"computations.html","page":"Computations","title":"Computations","text":"Daf.Computations\nDaf.Computations.@computation\nDaf.Computations.DEFAULT\nDaf.Computations.CONTRACT\nDaf.Computations.CONTRACT1\nDaf.Computations.CONTRACT2","category":"page"},{"location":"computations.html#Daf.Computations","page":"Computations","title":"Daf.Computations","text":"Support writing \"well-behaved\" computations. Such computations declare a Contract describing their inputs and outputs. This is enforced, so that the implementation need not worry about missing inputs, and the caller can rely on the results. It is also self-documenting, so the generated documentation is always contains a clear up-to-date description of the contract.\n\n\n\n\n\n","category":"module"},{"location":"computations.html#Daf.Computations.@computation","page":"Computations","title":"Daf.Computations.@computation","text":"@computation function something(...)\n return ...\nend\n\n@computation Contract(...) function something(daf::DafWriter, ...)\n return ...\nend\n\n@computation Contract(...) Contract(...) function something(\n first::DafReader/DafWriter, second::DafReader/DafWriter, ...\n)\n return ...\nend\n\nMark a function as a Daf computation. This has the following effects:\n\nIt verifies that the Daf data satisfies the Contract, when the computation is invoked and when it is complete (using verify_input and verify_output).\nIt stashes the contract(s) (if any) in a global variable. This allows expanding CONTRACT in the documentation string (for a single contract case), or CONTRACT1 and CONTRACT2 (for the dual contract case).\nIt stashes the default value of named arguments. This allows expanding DEFAULT in the documentation string, which is especially useful if these defaults are computed, read from global constants, etc.\nIt logs the invocation of the function (using @debug), including the actual values of the named arguments (using depict).\n\nnote: Note\nFor each Contract parameter (if any), there needs to be a DafReader or DafWriter, which the contract(s) will be applied to. These parameters should be the initial positional parameters of the function.\n\n\n\n\n\n","category":"macro"},{"location":"computations.html#Daf.Computations.DEFAULT","page":"Computations","title":"Daf.Computations.DEFAULT","text":"When using @computation:\n\n'''\n something(daf::DafWriter, x::Int = $(DEFAULT.x); y::Bool = $(DEFAULT.y))\n\n...\nIf `x` (default: $(DEFAULT.y)) is even, ...\n...\nIf `y` (default: $(DEFAULT.y)) is set, ...\n...\n'''\n@computation Contract(...)\nfunction something(daf::DafWriter, x::Int = 0; y::Bool = false)\n return ...\nend\n\nThen $(DEFAULT.x) will be expanded with the default value of the parameter x. It is good practice to contain a description of the effects of each parameter somewhere in the documentation, and it is polite to also provide its default value. This can be done in either the signature line or in the text, or both. Using DEFAULT ensures that the correct value is used in the documentation.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT","page":"Computations","title":"Daf.Computations.CONTRACT","text":"When using @computation:\n\n'''\n...\n# Contract\n...\n$(CONTRACT)\n...\n'''\n@computation Contract(...)\nfunction something(daf::DafWriter, ...)\n return ...\nend\n\nThen $(CONTRACT) will be expanded with a description of the Contract. This is based on DocStringExtensions.\n\nnote: Note\nThe first argument of the function must be a DafWriter, which the contract will be applied to.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT1","page":"Computations","title":"Daf.Computations.CONTRACT1","text":"Same as CONTRACT, but reference the contract for the 1st Daf argument for a @computation with two such arguments.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT2","page":"Computations","title":"Daf.Computations.CONTRACT2","text":"Same as CONTRACT, but reference the contract for the 2nd Daf argument for a @computation with two such arguments.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Index","page":"Computations","title":"Index","text":"","category":"section"},{"location":"computations.html","page":"Computations","title":"Computations","text":"Pages = [\"computations.md\"]","category":"page"},{"location":"reconstruction.html#Reconstruction","page":"Reconstruction","title":"Reconstruction","text":"","category":"section"},{"location":"reconstruction.html","page":"Reconstruction","title":"Reconstruction","text":"Daf.Reconstruction\nDaf.Reconstruction.reconstruct_axis!","category":"page"},{"location":"reconstruction.html#Daf.Reconstruction","page":"Reconstruction","title":"Daf.Reconstruction","text":"Reconstruct implicit axes. Due to AnnData two-axes limitations, other axes are often represented by storing their expanded data (e.g., a type for each cell, and a color for each cell, where the color is actually per type). When converting such data to Daf, it is useful to reconstruct such axes (e.g., create a type axis, assign a color for each type, and delete the per-cell color property).\n\n\n\n\n\n","category":"module"},{"location":"reconstruction.html#Daf.Reconstruction.reconstruct_axis!","page":"Reconstruction","title":"Daf.Reconstruction.reconstruct_axis!","text":"reconstruct_axis!(\n daf::DafWriter;\n existing_axis::AbstractString,\n implicit_axis::AbstractString,\n [rename_axis::Maybe{AbstractString} = nothing,\n empty_implicit::Maybe{StorageScalar} = nothing,\n implicit_properties::Maybe{AbstractStringSet} = nothing,\n properties_defaults::Maybe{AbstractDict} = nothing]\n)::AbstractDict{<:AbstractString, Maybe{StorageScalar}}\n\nGiven an existing_axis in daf, which has a property implicit_axis, create a new axis with the same name (or, if specified, call it rename_axis). If empty_implicit is specified, this value of the property is replaced by the empty string (indicate there is no value associated with the existing_axis entry). For each of the implicit_properties, we collect the mapping between the implicit_axis and the property values, and store it as a property of the newly created axis.\n\nIf the implicit_axis already exists, we verify that all the values provided for it by the existing_axis do, in fact, exist as names of entries in the implicit_axis. This allows manually creating the implicit_axis with additional entries that are not currently in use.\n\nnote: Note\nIf the implicit_axis already exists and contains entries that aren't currently in use, you must specify properties_defaults for the values of these entries of the reconstructed properties.Due to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:properties_defaults::Maybe{AbstractDict{<:AbstractString, <:StorageScalar}} = nothingBut what we are forced to say is:properties_defaults::Maybe{Dict} = nothingGlory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\nIf implicit_properties are explicitly specified, then we require the mapping from implicit_axis to be consistent. Otherwise, we look at all the properties of the existing_axis, and check for each one whether the mapping is consistent; if it is, we migrate the property to the new axis. For example, when importing AnnData containing per-cell data, it isn't always clear which property is actually per-batch (e.g., cell age) and which is actually per cell (e.g., doublet score). Not specifying the implicit_properties allows the function to figure it out on its own.\n\nnote: Note\nFor each converted property, the value associated with existing_axis entries which have no implicit_axis value (that is, have an empty string or empty_implicit value) is lost. For example, if each cell type has a color, but some cells do not have a type, then the color of \"cells with no type\" is lost. We still require this value to be consistent, and return a mapping between each migrated property name and the value of such entries (if any exist). When reconstructing the original property, specify this value using IfNot (e.g., / cell : type => color ?? magenta).\n\n\n\n\n\n","category":"function"},{"location":"reconstruction.html#Index","page":"Reconstruction","title":"Index","text":"","category":"section"},{"location":"reconstruction.html","page":"Reconstruction","title":"Reconstruction","text":"Pages = [\"reconstruction.md\"]","category":"page"},{"location":"contracts.html#Contracts","page":"Contracts","title":"Contracts","text":"","category":"section"},{"location":"contracts.html","page":"Contracts","title":"Contracts","text":"Daf.Contracts\nDaf.Contracts.Contract\nDaf.Contracts.ContractAxes\nDaf.Contracts.ContractData\nDaf.Contracts.ContractExpectation\nDaf.Contracts.verify_input\nDaf.Contracts.verify_output","category":"page"},{"location":"contracts.html#Daf.Contracts","page":"Contracts","title":"Daf.Contracts","text":"Enforce input and output contracts of computations using Daf data.\n\n\n\n\n\n","category":"module"},{"location":"contracts.html#Daf.Contracts.Contract","page":"Contracts","title":"Daf.Contracts.Contract","text":"Contract(;\n [axes::Maybe{ContractAxes} = nothing,\n data::Maybe{ContractData} = nothing]\n)::Contract\n\nThe contract of a computational tool, specifing the ContractAxes and ContractData.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractAxes","page":"Contracts","title":"Daf.Contracts.ContractAxes","text":"A vector of pairs where the key is the axis name and the value is a tuple of the ContractExpectation and a description of the axis (for documentation). Axes are listed mainly for documentation; axes of required or guaranteed vectors or matrices are automatically required or guaranteed to match. However it is considered polite to explicitly list the axes with their descriptions so the documentation of the contract will be complete.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ContractAxes = AbstractVector{Pair{AbstractString, Tuple{ContractExpectation, AbstractString}}}But what we are forced to say is:ContractAxes = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractData","page":"Contracts","title":"Daf.Contracts.ContractData","text":"A vector of pairs where the key is a DataKey identifying some data property, and the value is a tuple of the ContractExpectation, the expected data type, and a description (for documentation).\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ContractData = AbstractVector{Pair{DataKey, Tuple{ContractExpectation, Type, AbstractString}}}But what we are forced to say is:ContractData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractExpectation","page":"Contracts","title":"Daf.Contracts.ContractExpectation","text":"The expectation from a specific property for a computation on Daf data.\n\nInput data:\n\nRequiredInput - data that must exist in the data when invoking the computation, will be used as input.\n\nOptionalInput - data that, if existing in the data when invoking the computation, will be used as an input.\n\nOutput data:\n\nGuaranteedOutput - data that is guaranteed to exist when the computation is done.\n\nOptionalOutput - data that may exist when the computation is done, depending on some condition, which may include the existence of optional input and/or the value of parameters to the computation, and/or the content of the data.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.verify_input","page":"Contracts","title":"Daf.Contracts.verify_input","text":"verify_input(daf::DafReader, contract::Contract, computation::AbstractString)::Nothing\n\nVerify the daf data when a computation is invoked. This verifies that all the required data exists and is of the appropriate type, and that if any of the optional data exists, it has the appropriate type.\n\n\n\n\n\n","category":"function"},{"location":"contracts.html#Daf.Contracts.verify_output","page":"Contracts","title":"Daf.Contracts.verify_output","text":"verify_output(daf::DafReader, contract::Contract, computation::AbstractString)::Nothing\n\nVerify the daf data when a computation is complete. This verifies that all the guaranteed output data exists and is of the appropriate type, and that if any of the optional output data exists, it has the appropriate type.\n\n\n\n\n\n","category":"function"},{"location":"contracts.html#Index","page":"Contracts","title":"Index","text":"","category":"section"},{"location":"contracts.html","page":"Contracts","title":"Contracts","text":"Pages = [\"contracts.md\"]","category":"page"},{"location":"messages.html#Messages","page":"Messages","title":"Messages","text":"","category":"section"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Daf.Messages","category":"page"},{"location":"messages.html#Daf.Messages","page":"Messages","title":"Daf.Messages","text":"Functions for improving the quality of error and log messages.\n\n\n\n\n\n","category":"module"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Daf.Messages.unique_name\nDaf.Messages.depict\nDaf.Messages.depict_percent","category":"page"},{"location":"messages.html#Daf.Messages.unique_name","page":"Messages","title":"Daf.Messages.unique_name","text":"unique_name(prefix::AbstractString)::AbstractString\n\nUsing short, human-readable unique names for things is a great help when debugging. Normally one has to choose between using a human-provided short non-unique name, and an opaque object identifier, or a combination thereof. This function replaces the opaque object identifier with a short counter, which gives names that are both unique and short.\n\nThat is, this will return a unique name starting with the prefix and followed by #, the process index (if using multiple processes), and an index (how many times this name was used in the process). For example, unique_name(\"foo\") will return foo for the first usage, foo#2 for the 2nd, etc. If using multiple processes, it will return foo, foo#1.2, etc.\n\nThat is, for code where the names are unique (e.g., a simple script or Jupyter notebook), this doesn't mess up the names. It only appends a suffix to the names if it is needed to disambiguate between multiple uses of the same name.\n\nTo help with tests, if the prefix ends with !, we return it as-is, accepting it may not be unique.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Daf.MatrixLayouts.depict","page":"Messages","title":"Daf.MatrixLayouts.depict","text":"depict(value::Any)::String\n\nDepict a value in an error message or a log entry. Unlike \"$(value)\", this focuses on producing a human-readable indication of the type of the value, so it double-quotes strings, prefixes symbols with :, and reports the type and sizes of arrays rather than showing their content, as well as having specializations for the various Daf data types.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Daf.Messages.depict_percent","page":"Messages","title":"Daf.Messages.depict_percent","text":"depict_percent(used::Integer, out_of::Integer)::String\n\nDescribe a fraction of used amount out_of some total as a percentage.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Index","page":"Messages","title":"Index","text":"","category":"section"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Pages = [\"messages.md\"]","category":"page"},{"location":"matrix_layouts.html#Matrix-layouts","page":"Matrix layouts","title":"Matrix layouts","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts","page":"Matrix layouts","title":"Daf.MatrixLayouts","text":"All stored Daf matrix data has a clear matrix layout, that is, a major_axis, regardless of whether it is dense or sparse.\n\nThat is, for Columns-major data, the values of each column are laid out consecutively in memory (each column is a single contiguous vector), so any operation that works on whole columns will be fast (e.g., summing the value of each column). In contrast, the values of each row are stored far apart from each other, so any operation that works on whole rows will be very slow in comparison (e.g., summing the value of each row).\n\nFor Rows-major data, the values of each row are laid out consecutively in memory (each row is a single contiguous vector). In contrast, the values of each column are stored far apart from each other. In this case, summing columns would be slow, and summing rows would be fast.\n\nThis is much simpler than the ArrayLayouts module which attempts to fully describe the layout of N-dimensional arrays, a much more ambitious goal which is an overkill for our needs.\n\nnote: Note\nThe \"default\" layout in Julia is column-major, which inherits this from matlab, which inherits this from FORTRAN, allegedly because this is more efficient for some linear algebra operations. In contrast, Python numpy uses row-major layout by default. In either case, this is just an arbitrary convention, and all systems work just fine with data of either memory layout; the key consideration is to keep track of the layout, and to apply operations \"with the grain\" rather than \"against the grain\" of the data.\n\n\n\n\n\n","category":"module"},{"location":"matrix_layouts.html#Symbolic-names-for-axes","page":"Matrix layouts","title":"Symbolic names for axes","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.Rows\nDaf.MatrixLayouts.Columns\nDaf.MatrixLayouts.axis_name","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.Rows","page":"Matrix layouts","title":"Daf.MatrixLayouts.Rows","text":"A symbolic name for the rows axis. It is much more readable to write, say, size(matrix, Rows), instead of size(matrix, 1).\n\n\n\n\n\n","category":"constant"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.Columns","page":"Matrix layouts","title":"Daf.MatrixLayouts.Columns","text":"A symbolic name for the rows axis. It is much more readable to write, say, size(matrix, Columns), instead of size(matrix, 2).\n\n\n\n\n\n","category":"constant"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.axis_name","page":"Matrix layouts","title":"Daf.MatrixLayouts.axis_name","text":"axis_name(axis::Maybe{Integer})::String\n\nReturn the name of the axis (for messages).\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Checking-layout","page":"Matrix layouts","title":"Checking layout","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.major_axis\nDaf.MatrixLayouts.require_major_axis\nDaf.MatrixLayouts.minor_axis\nDaf.MatrixLayouts.require_minor_axis\nDaf.MatrixLayouts.other_axis","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.major_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.major_axis","text":"major_axis(matrix::AbstractMatrix)::Maybe{Int8}\n\nReturn the index of the major axis of a matrix, that is, the axis one should keep fixed for an efficient inner loop accessing the matrix elements. If the matrix doesn't support any efficient access axis, returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.require_major_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.require_major_axis","text":"require_major_axis(matrix::AbstractMatrix)::Int8\n\nSimilar to major_axis but will error if the matrix isn't in either row-major or column-major layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.minor_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.minor_axis","text":"minor_axis(matrix::AbstractMatrix)::Maybe{Int8}\n\nReturn the index of the minor axis of a matrix, that is, the axis one should vary for an efficient inner loop accessing the matrix elements. If the matrix doesn't support any efficient access axis, returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.require_minor_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.require_minor_axis","text":"require_minor_axis(matrix::AbstractMatrix)::Int8\n\nSimilar to minor_axis but will error if the matrix isn't in either row-major or column-major layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.other_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.other_axis","text":"other_axis(axis::Maybe{Integer})::Maybe{Int8}\n\nReturn the other matrix axis (that is, convert between Rows and Columns). If given nothing returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Changing-layout","page":"Matrix layouts","title":"Changing layout","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.relayout!","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.relayout!","page":"Matrix layouts","title":"Daf.MatrixLayouts.relayout!","text":"relayout!(matrix::AbstractMatrix)::AbstractMatrix\nrelayout!(matrix::NamedMatrix)::NamedMatrix\nrelayout!(destination::AbstractMatrix, source::AbstractMatrix)::AbstractMatrix\nrelayout!(destination::AbstractMatrix, source::NamedMatrix)::NamedMatrix\n\nReturn the same matrix data, but in the other memory layout.\n\nSuppose you have a column-major UMIs matrix, whose rows are cells, and columns are genes. Therefore, summing the UMIs of a gene will be fast, but summing the UMIs of a cell will be slow. A transpose (no !) of a matrix is fast; it creates a zero-copy wrapper of the matrix with flipped axes, so its rows will be genes and columns will be cells, but in row-major layout. Therefore, still, summing the UMIs of a gene is fast, and summing the UMIs of a cell is slow.\n\nIn contrast, transpose! (with a !) is slow; it creates a rearranged copy of the data, also returning a matrix whose rows are genes and columns are cells, but this time, in column-major layout. Therefore, in this case summing the UMIs of a gene will be slow, and summing the UMIs of a cell will be fast.\n\nnote: Note\nIt is almost always worthwhile to relayout! a matrix and then perform operations \"with the grain\" of the data, instead of skipping it and performing operations \"against the grain\" of the data. This is because (in Julia at least) the implementation of transpose! is optimized for the task, while the other operations typically don't provide any specific optimizations for working \"against the grain\" of the data. The benefits of a relayout! become even more significant when performing a series of operations (e.g., summing the gene UMIs in each cell, converting gene UMIs to fractions out of these totals, then computing the log base 2 of this fraction).\n\nIf you transpose (no !) the result of transpose! (with a !), you end up with a matrix that appears to be the same as the original (rows are cells and columns are genes), but behaves differently - summing the UMIs of a gene will be slow, and summing the UMIs of a cell is fast. This transpose of transpose! is a common idiom and is basically what relayout! does for you. In addition, relayout! will work for both sparse and dense matrices, and if destination is not specified, a similar matrix is allocated automatically for it.\n\nnote: Note\nThe caller is responsible for providing a sensible destination matrix (sparse for a sparse source, dense for a non-sparse source). This can be a transposed matrix. If source is a NamedMatrix, then the result will be a NamedMatrix with the same axes. If destination is also a NamedMatrix, then its axes must match source.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Ensuring-code-efficiency","page":"Matrix layouts","title":"Ensuring code efficiency","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.check_efficient_action\nDaf.MatrixLayouts.inefficient_action_handler","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.check_efficient_action","page":"Matrix layouts","title":"Daf.MatrixLayouts.check_efficient_action","text":"check_efficient_action(\n action::AbstractString,\n axis::Integer,\n operand::AbstractString,\n matrix::AbstractMatrix,\n)::Nothing\n\nThis will check whether the action about to be executed for an operand which is matrix works \"with the grain\" of the data, which requires the matrix to be in axis-major layout. If it isn't, then apply the inefficient_action_handler.\n\nIn general, you really want operations to go \"with the grain\" of the data. Unfortunately, Julia (and Python, and R, and matlab) will silently run operations \"against the grain\", which would be painfully slow. A liberal application of this function in your code will help in detecting such slowdowns, without having to resort to profiling the code to isolate the problem.\n\nnote: Note\nThis will not prevent the code from performing \"against the grain\" operations such as selectdim(matrix, Rows, 1) for a column-major matrix, but if you add this check before performing any (series of) operations on a matrix, then you will have a clear indication of whether (and where) such operations occur. You can then consider whether to invoke relayout! on the data, or (for data fetched from Daf), simply query for the other memory layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.inefficient_action_handler","page":"Matrix layouts","title":"Daf.MatrixLayouts.inefficient_action_handler","text":"inefficient_action_handler(handler::AbnormalHandler)::AbnormalHandler\n\nSpecify the AbnormalHandler to use when accessing a matrix in an inefficient way (\"against the grain\"). Returns the previous handler. The default handler is WarnHandler.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Index","page":"Matrix layouts","title":"Index","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Pages = [\"matrix_layouts.md\"]","category":"page"},{"location":"readers.html#Readers","page":"Readers","title":"Readers","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers\nDaf.Readers.description","category":"page"},{"location":"readers.html#Daf.Readers","page":"Readers","title":"Daf.Readers","text":"The DafReader interface specifies a high-level API for reading Daf data. This API is implemented here, on top of the low-level FormatReader API. The high-level API provides thread safety so the low-level API can (mostly) ignore this issue.\n\nEach data set is given a name to use in error messages etc. You can explicitly set this name when creating a Daf object. Otherwise, when opening an existing data set, if it contains a scalar \"name\" property, it is used. Otherwise some reasonable default is used. In all cases, object names are passed through unique_name to avoid ambiguity.\n\nData properties are identified by a unique name given the axes they are based on. That is, there is a separate namespace for scalar properties, vector properties for each specific axis, and matrix properties for each unordered pair of axes.\n\nFor matrices, we keep careful track of their MatrixLayouts. Returned matrices are always in column-major layout, using relayout! if necessary. As this is an expensive operation, we'll cache the result in memory. Similarly, we cache the results of applying a query to the data. We allow clearing the cache to reduce memory usage, if necessary.\n\nThe data API is the high-level API intended to be used from outside the package, and is therefore re-exported from the top-level Daf namespace. It provides additional functionality on top of the low-level FormatReader implementation, accepting more general data types, automatically dealing with relayout! when needed. In particular, it enforces single-writer multiple-readers for each data set, so the format code can ignore multi-threading and still be thread-safe.\n\nnote: Note\nIn the APIs below, when getting a value, specifying a default of undef means that it is an error for the value not to exist. In contrast, specifying a default of nothing means it is OK for the value not to exist, returning nothing. Specifying an actual value for default means it is OK for the value not to exist, returning the default instead. This is in spirit with, but not identical to, undef being used as a flag for array construction saying \"there is no initializer\". If you feel this is an abuse of the undef value, take some comfort in that it is the default value for the default, so you almost never have to write it explicitly in your code.\n\n\n\n\n\n","category":"module"},{"location":"readers.html#Daf.Readers.description","page":"Readers","title":"Daf.Readers.description","text":"description(daf::DafReader[; deep::Bool = false])::AbstractString\n\nReturn a (multi-line) description of the contents of daf. This tries to hit a sweet spot between usefulness and terseness. If cache, also describes the content of the cache. If deep, also describes any data set nested inside this one (if any).\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Scalar-properties","page":"Readers","title":"Scalar properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_scalar\nDaf.Readers.get_scalar","category":"page"},{"location":"readers.html#Daf.Readers.has_scalar","page":"Readers","title":"Daf.Readers.has_scalar","text":"has_scalar(daf::DafReader, name::AbstractString)::Bool\n\nCheck whether a scalar property with some name exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_scalar","page":"Readers","title":"Daf.Readers.get_scalar","text":"get_scalar(\n daf::DafReader,\n name::AbstractString;\n [default::Union{StorageScalar, Nothing, UndefInitializer} = undef]\n)::Maybe{StorageScalar}\n\nGet the value of a scalar property with some name in daf.\n\nIf default is undef (the default), this first verifies the name scalar property exists in daf. Otherwise default will be returned if the property does not exist.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Readers-axes","page":"Readers","title":"Readers axes","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_axis\nDaf.Readers.axis_names\nDaf.Readers.get_axis\nDaf.Readers.axis_length","category":"page"},{"location":"readers.html#Daf.Readers.has_axis","page":"Readers","title":"Daf.Readers.has_axis","text":"has_axis(daf::DafReader, axis::AbstractString)::Bool\n\nCheck whether some axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.axis_names","page":"Readers","title":"Daf.Readers.axis_names","text":"axis_names(daf::DafReader)::AbstractStringSet\n\nThe names of the axes of daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_axis","page":"Readers","title":"Daf.Readers.get_axis","text":"get_axis(\n daf::DafReader,\n axis::AbstractString;\n [default::Union{Nothing, UndefInitializer} = undef]\n)::Maybe{AbstractStringVector}\n\nThe unique names of the entries of some axis of daf. This is similar to doing get_vector for the special name property, except that it returns a simple vector of strings instead of a NamedVector.\n\nIf default is undef (the default), this verifies the axis exists in daf. Otherwise, the default is nothing, which is returned if the axis does not exist.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.axis_length","page":"Readers","title":"Daf.Readers.axis_length","text":"axis_length(daf::DafReader, axis::AbstractString)::Int64\n\nThe number of entries along the axis in daf.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Vector-properties","page":"Readers","title":"Vector properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_vector\nDaf.Readers.vector_names\nDaf.Readers.get_vector","category":"page"},{"location":"readers.html#Daf.Readers.has_vector","page":"Readers","title":"Daf.Readers.has_vector","text":"has_vector(daf::DafReader, axis::AbstractString, name::AbstractString)::Bool\n\nCheck whether a vector property with some name exists for the axis in daf. This is always true for the special name property.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.vector_names","page":"Readers","title":"Daf.Readers.vector_names","text":"vector_names(daf::DafReader, axis::AbstractString)::AbstractStringSet\n\nThe names of the vector properties for the axis in daf, not including the special name property.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_vector","page":"Readers","title":"Daf.Readers.get_vector","text":"get_vector(\n daf::DafReader,\n axis::AbstractString,\n name::AbstractString;\n [default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef]\n)::Maybe{NamedVector}\n\nGet the vector property with some name for some axis in daf. The names of the result are the names of the vector entries (same as returned by get_axis). The special property name returns an array whose values are also the (read-only) names of the entries of the axis.\n\nThis first verifies the axis exists in daf. If default is undef (the default), this first verifies the name vector exists in daf. Otherwise, if default is nothing, it will be returned. If it is a StorageVector, it has to be of the same size as the axis, and is returned. If it is a StorageScalar. Otherwise, a new Vector is created of the correct size containing the default, and is returned.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Matrix-properties","page":"Readers","title":"Matrix properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_matrix\nDaf.Readers.matrix_names\nDaf.Readers.get_matrix","category":"page"},{"location":"readers.html#Daf.Readers.has_matrix","page":"Readers","title":"Daf.Readers.has_matrix","text":"has_matrix(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [relayout::Bool = true]\n)::Bool\n\nCheck whether a matrix property with some name exists for the rows_axis and the columns_axis in daf. Since this is Julia, this means a column-major matrix. A daf may contain two copies of the same data, in which case it would report the matrix under both axis orders.\n\nIf relayout (the default), this will also check whether the data exists in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.matrix_names","page":"Readers","title":"Daf.Readers.matrix_names","text":"matrix_names(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString;\n [relayout::Bool = true]\n)::AbstractStringSet\n\nThe names of the matrix properties for the rows_axis and columns_axis in daf.\n\nIf relayout (default), then this will include the names of matrices that exist in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exist in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_matrix","page":"Readers","title":"Daf.Readers.get_matrix","text":"get_matrix(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [default::Union{StorageNumber, StorageMatrix, Nothing, UndefInitializer} = undef,\n relayout::Bool = true]\n)::Maybe{NamedMatrix}\n\nGet the column-major matrix property with some name for some rows_axis and columns_axis in daf. The names of the result axes are the names of the relevant axes entries (same as returned by get_axis).\n\nIf relayout (the default), then if the matrix is only stored in the other memory layout (that is, with flipped axes), then automatically call relayout! to compute the result. If daf isa DafWriter, then store the result for future use; otherwise, just cache it as MemoryData. This may lock up very large amounts of memory; you can call empty_cache! to release it.\n\nThis first verifies the rows_axis and columns_axis exist in daf. If default is undef (the default), this first verifies the name matrix exists in daf. Otherwise, if default is nothing, it is returned. If default is a StorageMatrix, it has to be of the same size as the rows_axis and columns_axis, and is returned. Otherwise, a new Matrix is created of the correct size containing the default, and is returned.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Utilities","page":"Readers","title":"Utilities","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.axis_version_counter\nDaf.Readers.vector_version_counter\nDaf.Readers.matrix_version_counter","category":"page"},{"location":"readers.html#Daf.Readers.axis_version_counter","page":"Readers","title":"Daf.Readers.axis_version_counter","text":"axis_version_counter(daf::DafReader, axis::AbstractString)::UInt32\n\nReturn the version number of the axis. This is incremented every time delete_axis! is called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.vector_version_counter","page":"Readers","title":"Daf.Readers.vector_version_counter","text":"vector_version_counter(daf::DafReader, axis::AbstractString, name::AbstractString)::UInt32\n\nReturn the version number of the vector. This is incremented every time set_vector!, empty_dense_vector! or empty_sparse_vector! are called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.matrix_version_counter","page":"Readers","title":"Daf.Readers.matrix_version_counter","text":"matrix_version_counter(daf::DafReader, rows_axis::AbstractString, columns_axis::AbstractString, name::AbstractString)::UInt32\n\nReturn the version number of the matrix. The order of the axes does not matter. This is incremented every time set_matrix!, empty_dense_matrix! or empty_sparse_matrix! are called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Index","page":"Readers","title":"Index","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Pages = [\"readers.md\"]","category":"page"},{"location":"copies.html#Copies","page":"Copies","title":"Copies","text":"","category":"section"},{"location":"copies.html","page":"Copies","title":"Copies","text":"Daf.Copies\nDaf.Copies.copy_scalar!\nDaf.Copies.copy_axis!\nDaf.Copies.copy_vector!\nDaf.Copies.copy_matrix!\nDaf.Copies.copy_all!\nDaf.Copies.EmptyData\nDaf.Copies.DataTypes","category":"page"},{"location":"copies.html#Daf.Copies","page":"Copies","title":"Daf.Copies","text":"Copy data between Daf data sets.\n\nnote: Note\nCopying into an in-memory data set does not duplicate the data; instead it just shares a reference to it. This is fast. In contrast, copying into a disk-based data set (e.g. using HDF5 or simple files) will create a duplicate of the data on disk. This is slow. However, both directions will not significantly increase the amount of memory allocated by the application.\n\n\n\n\n\n","category":"module"},{"location":"copies.html#Daf.Copies.copy_scalar!","page":"Copies","title":"Daf.Copies.copy_scalar!","text":"copy_scalar(;\n destination::DafWriter,\n source::DafReader,\n name::AbstractString,\n [rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, Nothing, UndefInitializer} = undef,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a scalar with some name from some source DafReader into some destination DafWriter.\n\nThe scalar is fetched using the name and the default. If rename is specified, store the scalar using this new name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing scalar in the target.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_axis!","page":"Copies","title":"Daf.Copies.copy_axis!","text":"copy_axis(;\n destination::DafWriter,\n source::DafReader,\n axis::AbstractString,\n [rename::Maybe{AbstractString} = nothing,\n default::Union{Nothing, UndefInitializer} = undef]\n)::Nothing\n\nCopy an axis from some source DafReader into some destination DafWriter.\n\nThe axis is fetched using the name and the default. If rename is specified, store the axis using this name.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_vector!","page":"Copies","title":"Daf.Copies.copy_vector!","text":"copy_vector(;\n destination::DafWriter,\n source::DafReader,\n axis::AbstractString,\n name::AbstractString,\n [reaxis::Maybe{AbstractString} = nothing,\n rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef,\n empty::Maybe{StorageScalar} = nothing,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a vector from some source DafReader into some destination DafWriter.\n\nThe vector is fetched using the axis, name and the default. If reaxis is specified, store the vector using this axis. If rename is specified, store the vector using this name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing vector in the target.\n\nThis requires the axis of one data set is the same, or is a superset of, or a subset of, the other. If the target axis contains entries that do not exist in the source, then empty must be specified to fill the missing values. If the source axis contains entries that do not exist in the target, they are discarded (not copied).\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_matrix!","page":"Copies","title":"Daf.Copies.copy_matrix!","text":"copy_matrix(;\n destination::DafWriter,\n source::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n [rows_reaxis::Maybe{AbstractString} = nothing,\n columns_reaxis::Maybe{AbstractString} = nothing,\n rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef,\n empty::Maybe{StorageScalar} = nothing,\n relayout::Bool = true,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a matrix from some source DafReader into some destination DafWriter.\n\nThe matrix is fetched using the rows_axis, columns_axis, name, relayout and the default. If rows_reaxis and/or columns_reaxis are specified, store the vector using these axes. If rename is specified, store the matrix using this name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing matrix in the target. The matrix is stored with the same relayout.\n\nThis requires each axis of one data set is the same, or is a superset of, or a subset of, the other. If a target axis contains entries that do not exist in the source, then empty must be specified to fill the missing values. If a source axis contains entries that do not exist in the target, they are discarded (not copied).\n\nnote: Note\nWhen copying a matrix from a subset to a superset, if the empty value is zero, then we create a sparse matrix in the destination. However, currently we create a temporary dense matrix for this; this is inefficient and should be replaced by a more efficient method.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_all!","page":"Copies","title":"Daf.Copies.copy_all!","text":"copy_all!(;\n destination::DafWriter,\n source::DafReader\n [empty::Maybe{EmptyData} = nothing,\n dtypes::Maybe{DataTypes} = nothing,\n overwrite::Bool = false,\n relayout::Bool = true]\n)::Nothing\n\nCopy all the content of a source DafReader into a destination DafWriter. If overwrite, this will overwrite existing data in the target. If relayout, matrices will be stored in the target both layouts, regardless of how they were stored in the source.\n\nThis will create target axes that exist in only in the source, but will not overwrite existing target axes, regardless of the value of overwrite. An axis that exists in the target must be identical to, or be a subset of, the same axis in the source.\n\nIf the source has axes which are a subset of the same axes in the target, then you must specify a dictionary of values for the empty entries that will be created in the target when copying any vector and/or matrix properties. This is specified using a (axis, property) => value entry for specifying an empty value for a vector property and a (rows_axis, columns_axis, property) => entry for specifying an empty value for a matrix property. The order of the axes for matrix properties doesn't matter (the same empty value is automatically used for both axes orders).\n\nIf dtype is specified, the copied data of the matching property is converted to the specified data type.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.EmptyData","page":"Copies","title":"Daf.Copies.EmptyData","text":"Specify the data to use for missing properties in a Daf data set. This is a dictionary with an DataKey specifying for which property we specify a value to, and the value to use.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs when initializing this dictionary. That is, what we'd like to say is:EmptyData = AbstractDict{DataKey, StorageScalar}But what we are forced to say is:EmptyData = AbstractDictThat's not a mistake. Even EmptyData = AbstractDict{Key, StorageScalar} where {Key} fails to work, as do all the (many) possibilities for expressing \"this is a dictionary where the key or the value can be one of several things\" Sigh. Glory to anyone who figures out an incantation that would force the system to perform any meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"copies.html#Daf.Copies.DataTypes","page":"Copies","title":"Daf.Copies.DataTypes","text":"Specify the data type to use for overriding properties types in a Daf data set. This is a dictionary with an DataKey specifying for which property we specify a value to, and the data type to use.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs when initializing this dictionary. That is, what we'd like to say is:DataTypes = AbstractDict{DataKey, Type{T}} where {T <: StorageScalarBase}But what we are forced to say is:DataTypes = AbstractDictThat's not a mistake. Even DataTypes = AbstractDict{Key, T <: StorageScalarBase} where {Key, T <: StorageScalarBase} fails to work, as do all the (many) possibilities for expressing \"this is a dictionary where the key or the value can be one of several things\" Sigh. Glory to anyone who figures out an incantation that would force the system to perform any meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"copies.html#Index","page":"Copies","title":"Index","text":"","category":"section"},{"location":"copies.html","page":"Copies","title":"Copies","text":"Pages = [\"copies.md\"]","category":"page"},{"location":"files_format.html#Files-Format","page":"Files Format","title":"Files Format","text":"","category":"section"},{"location":"files_format.html","page":"Files Format","title":"Files Format","text":"Daf.FilesFormat\nDaf.FilesFormat.MAJOR_VERSION\nDaf.FilesFormat.MINOR_VERSION\nDaf.FilesFormat.FilesDaf","category":"page"},{"location":"files_format.html#Daf.FilesFormat","page":"Files Format","title":"Daf.FilesFormat","text":"A Daf storage format in disk files. This is an efficient way to persist Daf data in a filesystem, and offers a different trade-off compared to storing the data in an HDF5 file.\n\nOn the downside, this being a directory, you need to create a zip or tar or some other form of archive file if you want to publish it. Also, accessing FilesDaf will consume multiple file descriptors as opposed to just one for HDF5, and, of course, HDF5 has libraries to support it in most systems.\n\nOn the upside, the format of the files is so simple that it is trivial to access them from any programming environment, without requiring a complex library like HDF5. In addition, since each scalar, vector or matrix property is stored in a separate file, deleting data automatically frees the storage (unlike in an HDF5 file, where you must manually repack the file to actually release the storage). Also, you can use standard tools to look at the data (e.g. use ls or the Windows file explorer to view the list of properties, how much space each one uses, when it was created, etc.). Most importantly, this allows using standard tools like make to create automatic repeatable processing workflows.\n\nWe use multiple files to store Daf data, under some root directory, as follows:\n\nThe directory will contain 4 sub-directories: scalars, axes, vectors, and matrices, and a file called daf.json.\nThe daf.json signifies that the directory contains Daf data. In this file, there should be a mapping with a version key whose value is an array of two integers. The first is the major version number and the second is the minor version number, using semantic versioning. This makes it easy to test whether a directory does/n't contain Daf data, and which version of the internal structure it is using. Currently the only defined version is [1,0].\nThe scalars directory contains scalar properties, each as in its own name.json file, containing a mapping with a type key whose value is the data type of the scalar (one of the StorageScalar types, with String for a string scalar) and a value key whose value is the actual scalar value.\nThe axes directory contains a name.txt file per axis, where each line contains a name of an axis entry.\nThe vectors directory contains a directory per axis, containing the vectors. For every vector, a name.json file will contain a mapping with an eltype key specifying the type of the vector element, and a format key specifying how the data is stored on disk, one of dense and sparse.\nIf the format is dense, then there will be a file containing the vector entries, either name.txt for strings (with a value per line), or name.data for binary data (which we can memory-map for direct access).\nIf the format is sparse, then there will also be an indtype key specifying the data type of the indices of the non-zero values, and two binary data files, name.nzind containing the indices of the non-zero entries, and name.nzval containing the values of the non-zero entries (which we can memory-map for direct access). See Julia's SparseVector implementation for details.\nThe matrices directly contains a directory per rows axis, which contains a directory per columns axis, which contains the matrices. For each matrix, a name.json file will contain a mapping with an eltype key specifying the type of the matrix element, and a format key specifying how the data is stored on disk, one of dense and sparse.\nIf the format is dense, then there will be a name.data binary file in column-major layout (which we can memory-map for direct access).\nIf the format is sparse, then there will also be an indtype key specifying the data type of the indices of the non-zero values, and three binary data files, name.colptr, name.rowval containing the indices of the non-zero values, and name.nzval containing the values of the non-zero entries (which we can memory-map for direct access). See Julia's SparseMatrixCSC implementation for details.\n\nExample directory structure:\n\nexample-daf-dataset-root-directory/\n├─ daf.json\n├─ scalars/\n│ └─ version.json\n├─ axes/\n│ ├─ cell.txt\n│ └─ gene.txt\n├─ vectors/\n│ ├─ cell/\n│ │ ├─ batch.json\n│ │ └─ batch.txt\n│ └─ gene/\n│ ├─ is_marker.json\n│ └─ is_marker.data\n└─ matrices/\n ├─ cell/\n │ ├─ cell/\n │ └─ gene/\n │ ├─ UMIs.json\n │ ├─ UMIs.colptr\n │ ├─ UMIs.rowval\n │ └─ UMIs.nzval\n └─ gene/\n ├─ cell/\n └─ gene/\n\nnote: Note\nAll binary data is stored as a sequence of elements, in little endian byte order (which is the native order for modern CPUs), without any headers or padding. (Dense) matrices are stored in column-major layout (which matches Julia's native matrix layout).All string data is stored in lines, one entry per line, separated by a `\n\ncharacter (regardless of the OS used). Therefore, you can't have a line break inside an axis entry name or in a vector property value, at least not when storing it inFilesDaf`.\n\nWhen creating an HDF5 file to contain `Daf` data, you should specify\n`;fapl=HDF5.FileAccessProperties(;alignment=(1,8))`. This ensures all the memory buffers are properly aligned for\nefficient access. Otherwise, memory mapping will be **much** less efficient. A warning is therefore generated\nwhenever you try to access `Daf` data stored in an HDF5 file which does not enforce proper alignment.\n\nThat's all there is to it. The format is intentionally simple and transparent to maximize its accessibility by other (standard) tools. Still, it is easiest to create the data using the Julia Daf package.\n\nnote: Note\nThe code here assumes the files data obeys all the above conventions and restrictions. As long as you only create and access Daf data in files using FilesDaf, then the code will work as expected (assuming no bugs). However, if you do this in some other way (e.g., directly using the filesystem and custom tools), and the result is invalid, then the code here may fails with \"less than friendly\" error messages.\n\n\n\n\n\n","category":"module"},{"location":"files_format.html#Daf.FilesFormat.MAJOR_VERSION","page":"Files Format","title":"Daf.FilesFormat.MAJOR_VERSION","text":"The specific major version of the FilesDaf format that is supported by this code (1). The code will refuse to access data that is stored in a different major format.\n\n\n\n\n\n","category":"constant"},{"location":"files_format.html#Daf.FilesFormat.MINOR_VERSION","page":"Files Format","title":"Daf.FilesFormat.MINOR_VERSION","text":"The maximal minor version of the FilesDaf format that is supported by this code (0). The code will refuse to access data that is stored with the expected major version (1), but that uses a higher minor version.\n\nnote: Note\nModifying data that is stored with a lower minor version number may increase its minor version number.\n\n\n\n\n\n","category":"constant"},{"location":"files_format.html#Daf.FilesFormat.FilesDaf","page":"Files Format","title":"Daf.FilesFormat.FilesDaf","text":"FilesDaf(\n path::AbstractString,\n mode::AbstractString = \"r\";\n [name::Maybe{AbstractString} = nothing]\n)\n\nStorage in disk files in some directory.\n\nWhen opening an existing data set, if name is not specified, and there exists a \"name\" scalar property, it is used as the name. Otherwise, the path will be used as the name.\n\nThe valid mode values are as follows (the default mode is r):\n\nMode Allow modifications? Create if does not exist? Truncate if exists? Returned type\nr No No No DafReadOnly\nr+ Yes No No FilesDaf\nw+ Yes Yes No FilesDaf\nw Yes Yes Yes FilesDaf\n\n\n\n\n\n","category":"type"},{"location":"files_format.html#Index","page":"Files Format","title":"Index","text":"","category":"section"},{"location":"files_format.html","page":"Files Format","title":"Files Format","text":"Pages = [\"files_format.md\"]","category":"page"},{"location":"groups.html#Groups","page":"Groups","title":"Groups","text":"","category":"section"},{"location":"groups.html","page":"Groups","title":"Groups","text":"Daf.Groups\nDaf.Groups.group_names\nDaf.Groups.compact_groups!\nDaf.Groups.collect_group_members","category":"page"},{"location":"groups.html#Daf.Groups","page":"Groups","title":"Daf.Groups","text":"Functions for dealing with computing groups of axis entries (typically for creating a new axis).\n\n\n\n\n\n","category":"module"},{"location":"groups.html#Daf.Groups.group_names","page":"Groups","title":"Daf.Groups.group_names","text":"group_names(\n daf::DafReader,\n axis::AbstractString,\n entries_of_groups::AbstractVector{<:AbstractVector{<:Integer}};\n prefix::AbstractString,\n)::Vector{String}\n\nGiven an entries_of_groups vector of vectors, one for each group, containing the (sorted) indices of the entries of the group along some axis of some daf data set, return a vector giving a unique name for each group. This name consists of the prefix, followed by the index of the group, followed by a .XX two-digit suffix which is a hash of the names of the axis entries of the group.\n\nThe returned names strike a balance between readability and safety. A name like M123.89 for group #123 is easy to deal with manually, but is also reasonably safe in the common use case that groups are re-computed, and there is per-group metadata lying around associated with the old groups, as the probability of the new group #123 having the same suffix is only 1% (unless it is actually identical).\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Daf.Groups.compact_groups!","page":"Groups","title":"Daf.Groups.compact_groups!","text":"compact_groups!(\n group_indices::AbstractVector{<:Integer},\n)::Int\n\nGiven an array group_indices which assigns each entry of some axis to a non-negative group index (with zero meaning \"no group\"), compact it in-place so that the group indices will be 1...N, and return N.\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Daf.Groups.collect_group_members","page":"Groups","title":"Daf.Groups.collect_group_members","text":"collect_group_members(\n group_indices::AbstractVector{T},\n)::Vector{Vector{T}} where {T <: Integer}\n\nGiven an array group_indices which assigns each entry of some axis to a non-negative group index (with zero meaning \"no group\"), where the group indices are compact (in the range 1...N), return a vector of vectors, one for each group, containing the (sorted) indices of the entries of the group.\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Index","page":"Groups","title":"Index","text":"","category":"section"},{"location":"groups.html","page":"Groups","title":"Groups","text":"Pages = [\"groups.md\"]","category":"page"},{"location":"generic_logging.html#Generic-Logging","page":"Generic Logging","title":"Generic Logging","text":"","category":"section"},{"location":"generic_logging.html","page":"Generic Logging","title":"Generic Logging","text":"Daf.GenericLogging\nDaf.GenericLogging.setup_logger\nDaf.GenericLogging.@logged","category":"page"},{"location":"generic_logging.html#Daf.GenericLogging","page":"Generic Logging","title":"Daf.GenericLogging","text":"Generic macros and functions for logging, that arguably should belong in a more general-purpose package.\n\nWe do not re-export the macros and functions defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericLogging.\n\n\n\n\n\n","category":"module"},{"location":"generic_logging.html#Daf.GenericLogging.setup_logger","page":"Generic Logging","title":"Daf.GenericLogging.setup_logger","text":"setup_logger(\n io::IO = stderr;\n [level::LogLevel = Warn,\n show_time::Bool = true,\n show_module::Bool = true,\n show_location::Bool = false]\n)::Nothing\n\nSetup a global logger that will print into io, printing messages with a timestamp prefix.\n\nBy default, this will only print warnings. Note that increasing the log level will apply to everything. An alternative is to set up the environment variable JULIA_DEBUG to a comma-separated list of modules you wish to see the debug messages of.\n\nIf show_time, each message will be prefixed with a yyyy-dd-mm HH:MM:SS.sss timestamp prefix.\n\nIf show_module, each message will be prefixed with the name of the module emitting the message.\n\nIf show_location, each message will be prefixed with the file name and the line number emitting the message.\n\n\n\n\n\n","category":"function"},{"location":"generic_logging.html#Daf.GenericLogging.@logged","page":"Generic Logging","title":"Daf.GenericLogging.@logged","text":"@logged function something(...)\n return ...\nend\n\nAutomatically log (in Debug level) every invocation to the function. This will also log the values of the arguments. Emits a second log entry when the function returns, with the result (if any).\n\n\n\n\n\n","category":"macro"},{"location":"generic_logging.html#Index","page":"Generic Logging","title":"Index","text":"","category":"section"},{"location":"generic_logging.html","page":"Generic Logging","title":"Generic Logging","text":"Pages = [\"generic_logging.md\"]","category":"page"},{"location":"generic_types.html#Generic-Types","page":"Generic Types","title":"Generic Types","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes","category":"page"},{"location":"generic_types.html#Daf.GenericTypes","page":"Generic Types","title":"Daf.GenericTypes","text":"Generic types that arguably should belong in a more general-purpose package.\n\nWe do not re-export the types and functions defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericTypes.\n\n\n\n\n\n","category":"module"},{"location":"generic_types.html#Unions","page":"Generic Types","title":"Unions","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"We got sick and tired of writing Union{..., Nothing} everywhere. We therefore created this shorthand unions listed below and used them throughout the code. We're well aware there was a religious war of whether there should be a standard shorthand for this, vs. a standard shorthand for Union{..., Missing}, with everyone losing, that is, having to use the explicit Union notation everywhere.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Looking at the answers here then Nothing means \"there is no value\" and Missing means \"there is a value, but we don't know what it is\" (Unknown might have been a better name).","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Under this interpretation, Union{..., Nothing} has (almost) the same semantics as Haskell's Maybe, so that's what we called it (other languages call this Optional or Opt). It is used heavily in our (and a lot of other) Julia code. We also added Unsure as a shorthand for Union{..., Missing} for completeness, but we do not actually use it anywhere. We assume it is useful for Julia code dealing specifically with statistical analysis.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes.Maybe\nDaf.GenericTypes.Unsure","category":"page"},{"location":"generic_types.html#Daf.GenericTypes.Maybe","page":"Generic Types","title":"Daf.GenericTypes.Maybe","text":"Maybe{T} = Union{T, Nothing}\n\nThe type to use when maybe there is a value, maybe there isn't. This is exactly as if writing the explicit Union with Nothing but is shorter and more readable. This is extremely common.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Daf.GenericTypes.Unsure","page":"Generic Types","title":"Daf.GenericTypes.Unsure","text":"Unsure{T} = Union{T, Missing}\n\nThe type to use when maybe there always is a value, but sometimes we are not sure what it is. This is exactly as if writing the explicit Union with Missing but is shorter and more readable. This is only used in code dealing with statistics to represent missing (that is, unknown) data. It is only provided here for completeness.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Strings","page":"Generic Types","title":"Strings","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"We use the following as type annotations for function arguments to be able to accept \"any\" concrete string collection type.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes.AbstractStringVector\nDaf.GenericTypes.AbstractStringSet","category":"page"},{"location":"generic_types.html#Daf.GenericTypes.AbstractStringVector","page":"Generic Types","title":"Daf.GenericTypes.AbstractStringVector","text":"AbstractStringVector = AbstractVector{S} where {S <: AbstractString}\n\nA vector of strings, without commitment to the concrete implementation of either the vector or the strings contained in it.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Daf.GenericTypes.AbstractStringSet","page":"Generic Types","title":"Daf.GenericTypes.AbstractStringSet","text":"AbstractStringSet = AbstractSet{S} where {S <: AbstractString}\n\nA set of strings, without commitment to the concrete implementation of either the set or the strings contained in it.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Index","page":"Generic Types","title":"Index","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Pages = [\"generic_types.md\"]","category":"page"},{"location":"anndata_format.html#AnnData-Format","page":"AnnData Format","title":"AnnData Format","text":"","category":"section"},{"location":"anndata_format.html","page":"AnnData Format","title":"AnnData Format","text":"Daf.AnnDataFormat\nDaf.AnnDataFormat.anndata_as_daf\nDaf.AnnDataFormat.daf_as_anndata","category":"page"},{"location":"anndata_format.html#Daf.AnnDataFormat","page":"AnnData Format","title":"Daf.AnnDataFormat","text":"Import/export Daf data from/to AnnData.\n\nDue to the different data models, not all the content of AnnData can be represented as Daf, and vice-versa. However, \"most\" of the data can be automatically converted from one form to the other. In both directions, conversion is zero-copy; that is, we merely create a different view for the same vectors and matrices. We also use memory-mapping whenever possible for increased performance.\n\nnote: Note\n\n\nWe use the AnnData Julia implementation from Muon.jl. The last published released for this package is from 2021, and lacks features added over the years, which we use. Therefore, currently Daf uses the head revision of Muon from github, with all that implies. We'll change this to a proper registry dependency if/when a new Muon version is released.\n\nThe following Daf data can't be naively stored in AnnData:\n\nAnnData is restricted to storing data for only two axes, which AnnData always calls \"obs\" and \"var\". In contrast, Daf can store data for an arbitrary set of meaningfully named axes.\nAnndata always contains a matrix property for these two axes called \"X\". Mercifully, the rest of the matrices are allowed to have meaningful names. In contrast, Daf allows storing an arbitrary set of meaningfully named matrices.\nAnnData can only hold row-major matrices, while Julia defaults to column-major layout.\n\nTherefore, when viewing Daf data as AnnData, we pick two specific axes and rename them to \"obs\" and \"var\", pick a specific matrix property of these axes and rename it to \"X\", and relayout! it if needed so AnnData would be happy. We store the discarded names of the axes and matrix in unstructured annotations called obs_is, var_is and X_is. This allows us to reconstruct the original names when re-viewing the AnnData as Daf data.\n\nThe following AnnData can't be naively stored in Daf:\n\nNon-scalars (e.g., mappings) inside uns unstructured annotations. The Daf equivalent is storing JSON string blobs, which is awkward to use. TODO: provide better API to deal with such data.\nData using nullable entries (e.g. a matrix with nullable integer entries). In contrast, Daf supports the convention that zero values are special. This only works in some cases (e.g., it isn't a good solution for Boolean data). It is possible of course to explicitly store Boolean masks and apply them to the data, but this is inconvenient. TODO: Have Daf natively support nullable/masked arrays.\nCategorical data. Categorical vectors are therefore converted to simple strings. However, Daf doesn't support matrices of strings, so it doesn't support or convert categorical matrices.\nMatrix data that only uses one of the axes (that is, obsm and varm data). The problem here is, paradoxically, that Daf supports such data \"too well\", by allowing multiple axes to be defined, and storing matrices based on any pair of axes. However, this requires the other axes to be explicitly created, and their information just doesn't exist in the AnnData data set. TODO: Allow unstructured annotations to store the entries of the other axis.\n\nWhen viewing AnnData as Daf, we either ignore, warn, or treat as an error any such unsupported data.\n\nwarning: DANGER, WILL ROBINSON\n\n\nSquare matrices accessed via Daf APIs will be the (column-major) transpose of the original AnnData (row-major) matrix.\n\nDue to limitations of the Daf data model, square matrices are stored only in column-major layout. In contrast, AnnData square matrices (obsp, varp), are stored in row-major layout. We have several bad options to address this:\n\nWe can break the Daf invariant that all accessed data is column-major, at least for square matrices. This is bad because the invariant greatly simplifies Daf client code. Forcing clients to check the data layout and calling relayout! would add a lot of error-prone boilerplate to our users.\nWe can relayout! the data when copying it between AnnData and Daf. This is bad because, it would force us to duplicate the data. More importantly, there is typically a good reason for the layout of the data. For example, assume a directed graph between cells. A common way to store is is to have a square matrix where each row contains the weights of the edges originating in one cell, connecting it to all other cells. This allows code to efficiently \"loop on all cells; loop on all outgoing edges\". If we relayout! the data, then such a loop would become extremely inefficient.\nWe can return the transposed matrix from Daf. This is bad because Julia code and Python code processing the \"same\" data would need to flip the indices (e.g., outgoing_weight[from_cell, to_cell] in Python vs. outgoing_weight[to_cell, from_cell] in Julia).\n\nHaving to pick between these bad options, we chose the last one as the lesser evil. The assumption is that Julia code is written separately from the Python code anyway. If the same algorithm is implemented in both systems, it would work (efficiently!), as long as the developer read this warning and flipped the order of the indices, that is.\n\nWe do not have this problem with non-square matrices (e.g., the per-cell-per-gene UMIs matrix), since Daf allows for storing and accessing both layouts of the same data in this case. We simply populate Daf with the row-major data from AnnData and if asked for the outher layout, will relayout! it (and store/cache the result).\n\n\n\n\n\n","category":"module"},{"location":"anndata_format.html#Daf.AnnDataFormat.anndata_as_daf","page":"AnnData Format","title":"Daf.AnnDataFormat.anndata_as_daf","text":"anndata_as_daf(\n adata::Union{AnnData, AbstractString};\n [name::Maybe{AbstractString} = nothing,\n obs_is::Maybe{AbstractString} = nothing,\n var_is::Maybe{AbstractString} = nothing,\n X_is::Maybe{AbstractString} = nothing,\n unsupported_handler::AbnormalHandler = WarnHandler]\n)::MemoryDaf\n\nView AnnData as a Daf data set, specifically using a MemoryDaf. This doesn't duplicate matrices or vectors, but acts as a view containing references to the same ones. Adding and/or deleting data in the view using the Daf API will not affect the original adata.\n\nAny unsupported AnnData annotations will be handled using the unsupported_handler. By default, we'll warn about each and every such unsupported property.\n\nIf adata is a string, then it is the path of an h5ad file which is automatically loaded.\n\nIf not specified, the name will be the value of the \"name\" uns property, if it exists, otherwise, it will be \"anndata\".\n\nIf not specified, obs_is (the name of the \"obs\" axis) will be the value of the \"obs_is\" uns property, if it exists, otherwise, it will be \"obs\".\n\nIf not specified, var_is (the name of the \"var\" axis) will be the value of the \"var_is\" uns property, if it exists, otherwise, it will be \"var\".\n\nIf not specified, X_is (the name of the \"X\" matrix) will be the value of the \"X_is\" uns property, if it exists, otherwise, it will be \"X\".\n\n\n\n\n\n","category":"function"},{"location":"anndata_format.html#Daf.AnnDataFormat.daf_as_anndata","page":"AnnData Format","title":"Daf.AnnDataFormat.daf_as_anndata","text":"daf_as_anndata(\n daf::DafReader;\n [obs_is::Maybe{AbstractString} = nothing,\n var_is::Maybe{AbstractString} = nothing,\n X_is::Maybe{AbstractString} = nothing,\n h5ad::Maybe{AbstractString} = nothing]\n)::AnnData\n\nView the daf data set as AnnData. This doesn't duplicate matrices or vectors, but acts as a view containing references to the same ones. Adding and/or deleting data in the view using the AnnData API will not affect the original daf data set.\n\nIf specified, the result is also written to an h5ad file.\n\nIf not specified, obs_is (the name of the \"obs\" axis) will be the value of the \"obs_is\" scalar property, if it exists, otherwise, it will be \"obs\".\n\nIf not specified, var_is (the name of the \"var\" axis) will be the value of the \"var_is\" scalar property, if it exists, otherwise, it will be \"var\".\n\nIf not specified, X_is (the name of the \"X\" matrix) will be the value of the \"X_is\" scalar property, if it exists, otherwise, it will be \"X\".\n\nEach of the final obs_is, var_is, X_is values is stored as unstructured annotations, unless the default value (\"obs\", \"var\", \"X\") is used.\n\nAll scalar properties, vector properties of the chosen \"obs\" and \"var\" axes, and matrix properties of these axes, are stored in the returned new AnnData object.\n\n\n\n\n\n","category":"function"},{"location":"anndata_format.html#Index","page":"AnnData Format","title":"Index","text":"","category":"section"},{"location":"anndata_format.html","page":"AnnData Format","title":"AnnData Format","text":"Pages = [\"anndata_format.md\"]","category":"page"},{"location":"memory_format.html#Memory-Format","page":"Memory Format","title":"Memory Format","text":"","category":"section"},{"location":"memory_format.html","page":"Memory Format","title":"Memory Format","text":"Daf.MemoryFormat\nDaf.MemoryFormat.MemoryDaf","category":"page"},{"location":"memory_format.html#Daf.MemoryFormat","page":"Memory Format","title":"Daf.MemoryFormat","text":"In-memory Daf storage format.\n\n\n\n\n\n","category":"module"},{"location":"memory_format.html#Daf.MemoryFormat.MemoryDaf","page":"Memory Format","title":"Daf.MemoryFormat.MemoryDaf","text":"struct MemoryDaf <: DafWriter ... end\n\nMemoryDaf(; name = \"memory\")\n\nSimple in-memory storage.\n\nThis just keeps everything in-memory, similarly to the way an AnnData object works; that is, this is a lightweight object that just keeps references to the data it is given.\n\nThis is the \"default\" storage type you should use, unless you need to persist the data on the disk.\n\n\n\n\n\n","category":"type"},{"location":"memory_format.html#Index","page":"Memory Format","title":"Index","text":"","category":"section"},{"location":"memory_format.html","page":"Memory Format","title":"Memory Format","text":"Pages = [\"memory_format.md\"]","category":"page"},{"location":"operations.html#Query-operations","page":"Query operations","title":"Query operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations","category":"page"},{"location":"operations.html#Daf.Operations","page":"Query operations","title":"Daf.Operations","text":"A Daf query can use operations to process the data: EltwiseOperations that preserve the shape of the data, and ReductionOperations that reduce a matrix to a vector, or a vector to a scalar.\n\n\n\n\n\n","category":"module"},{"location":"operations.html#Element-wise-operations","page":"Query operations","title":"Element-wise operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.Abs\nDaf.Operations.Clamp\nDaf.Operations.Convert\nDaf.Operations.Fraction\nDaf.Operations.Log\nDaf.Operations.Round\nDaf.Operations.Significant","category":"page"},{"location":"operations.html#Daf.Operations.Abs","page":"Query operations","title":"Daf.Operations.Abs","text":"Abs([; dtype::Maybe{Type} = nothing])\n\nElement-wise operation that converts every element to its absolute value.\n\nParameters\n\ndtype - The default output data type is the unsigned_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Clamp","page":"Query operations","title":"Daf.Operations.Clamp","text":"Clamp([; min::Maybe{StorageNumber} = nothing, max::Maybe{StorageNumber} = nothing])\n\nElement-wise operation that converts every element to a value inside a range.\n\nParameters\n\nmin - If specified, values lower than this will be increased to this value.\n\nmax - If specified, values higher than this will be increased to this value.\n\nnote: Note\nAt least one of min and max must be specified.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Convert","page":"Query operations","title":"Daf.Operations.Convert","text":"Convert([; dtype::Type])\n\nElement-wise operation that converts every element to a given data type.\n\nParameters\n\ndtype - The data type to convert to. There's no default.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Fraction","page":"Query operations","title":"Daf.Operations.Fraction","text":"Fraction([; dtype::Type])\n\nElement-wise operation that converts every element to its fraction out of the total. If the total is zero, all the fractions are also set to zero. This implicitly assumes (but does not enforce) that all the entry value(s) are positive.\n\nFor matrices, each entry becomes its fraction out of the total of the column it belongs to. For vectors, each entry becomes its fraction out of the total of the vector. For scalars, this operation makes no sense so fails with an error.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Log","page":"Query operations","title":"Daf.Operations.Log","text":"Log(; dtype::Maybe{Type} = nothing, base::StorageNumber = e, eps::StorageNumber = 0)\n\nElement-wise operation that converts every element to its logarithm.\n\nParameters:\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\nbase - The base of the logarithm. By default uses e (that is, computes the natural logarithm), which isn't convenient, but is the standard.\n\neps - Added to the input before computing the logarithm, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Round","page":"Query operations","title":"Daf.Operations.Round","text":"Round([; dtype::Maybe{Type} = nothing])\n\nElement-wise operation that converts every element to the nearest integer value.\n\nParameters\n\ndtype - By default, uses the int_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Significant","page":"Query operations","title":"Daf.Operations.Significant","text":"Significant(; high::StorageNumber, low::Maybe{StorageNumber} = nothing)\n\nElement-wise operation that zeros all \"insignificant\" values. Significant values have a high absolute value. This is typically used to prune matrices of effect sizes (log of ratio between a baseline and some result) for heatmap display. For example, log base 2 of gene expression ratio is typically considered significant if it is at least 3 (that is, a ratio at least 8x or at most 1/8x); for genes that have a significant effect, we typically display all entries with a log of at least 2 (that is, a ratio of at least 4x or at most 1/4x).\n\nFor scalars, this operation makes no sense so fails with an error.\n\nParameters:\n\nhigh - A value is considered significant if its absolute value is higher than this. If all values in a vector (or a matrix column) are less than this, then all the vector (or matrix column) entries are zeroed. There's no default.\n\nlow - If there is at least one significant value in a vector (or a matrix column), then zero all entries that are lower than this. By default, this is the same as the high value. Setting it to a lower value will preserve more entries, but only for vectors (or matrix columns) which contain at least some significant data.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Reduction-operations","page":"Query operations","title":"Reduction operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.Sum\nDaf.Operations.Max\nDaf.Operations.Min\nDaf.Operations.Median\nDaf.Operations.Quantile\nDaf.Operations.Mean\nDaf.Operations.GeoMean\nDaf.Operations.Std\nDaf.Operations.StdN\nDaf.Operations.Var\nDaf.Operations.VarN\nDaf.Operations.Mode\nDaf.Operations.Count","category":"page"},{"location":"operations.html#Daf.Operations.Sum","page":"Query operations","title":"Daf.Operations.Sum","text":"Sum(; dtype::Maybe{Type} = nothing)\n\nReduction operation that sums elements.\n\nParameters\n\ndtype - By default, uses the sum_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Max","page":"Query operations","title":"Daf.Operations.Max","text":"Max()\n\nReduction operation that returns the maximal element.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Min","page":"Query operations","title":"Daf.Operations.Min","text":"Min()\n\nReduction operation that returns the minimal element.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Median","page":"Query operations","title":"Daf.Operations.Median","text":"Median(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the median value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Quantile","page":"Query operations","title":"Daf.Operations.Quantile","text":"Quantile(; dtype::Maybe{Type} = nothing, p::StorageNumber)\n\nReduction operation that returns the quantile value, that is, a value such that a certain fraction of the values is lower.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\np - The fraction of values below the result (e.g., the 0 computes the minimum, the 0.5 computes the median, and 1.0 computes the maximum). There's no default.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Mean","page":"Query operations","title":"Daf.Operations.Mean","text":"Mean(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.GeoMean","page":"Query operations","title":"Daf.Operations.GeoMean","text":"GeoMean(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0.0)\n\nReduction operation that returns the geometric mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - The regularization factor added to each value and subtracted from the raw geo-mean, to deal with zero values.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Std","page":"Query operations","title":"Daf.Operations.Std","text":"Std(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the (uncorrected) standard deviation of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.StdN","page":"Query operations","title":"Daf.Operations.StdN","text":"StdN(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0)\n\nReduction operation that returns the (uncorrected) standard deviation of the values, normalized (divided) by the mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - Added to the mean before computing the division, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Var","page":"Query operations","title":"Daf.Operations.Var","text":"Var(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the (uncorrected) variance of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.VarN","page":"Query operations","title":"Daf.Operations.VarN","text":"VarN(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0.0)\n\nReduction operation that returns the (uncorrected) variance of the values, normalized (divided) by the mean of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - Added to the mean before computing the division, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Mode","page":"Query operations","title":"Daf.Operations.Mode","text":"Mode()\n\nReduction operation that returns the most frequent value in the input (the \"mode\").\n\nnote: Note\nThis operation supports strings; most operations do not.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Count","page":"Query operations","title":"Daf.Operations.Count","text":"Count(; dtype::Maybe{Type} = nothing)\n\nReduction operation that counts elements. This is useful when using GroupBy queries to count the number of elements in each group.\n\nParameters\n\ndtype - By default, uses UInt32.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Support-functions","page":"Query operations","title":"Support functions","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.parse_parameter_value\nDaf.Operations.parse_number_value\nDaf.Operations.parse_number_dtype_value\nDaf.Operations.parse_float_dtype_value\nDaf.Operations.parse_int_dtype_value\nDaf.Operations.error_invalid_parameter_value\nDaf.Operations.float_dtype_for\nDaf.Operations.int_dtype_for\nDaf.Operations.unsigned_dtype_for\nDaf.Operations.sum_dtype_for","category":"page"},{"location":"operations.html#Daf.Operations.parse_parameter_value","page":"Query operations","title":"Daf.Operations.parse_parameter_value","text":"parse_parameter_value(\n parse_value::Function,\n operation_name::Token,\n operation_kind::AbstractString,\n parameters_values::Dict{String, Token},\n parameter_name::AbstractString,\n default::Any,\n)::Any\n\nParse an operation parameter.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_number_value","page":"Query operations","title":"Daf.Operations.parse_number_value","text":"parse_number_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n type::Type{T},\n)::T where {T <: StorageNumber}\n\nParse a numeric operation parameter.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_number_dtype_value","page":"Query operations","title":"Daf.Operations.parse_number_dtype_value","text":"parse_number_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nParse the dtype operation parameter.\n\nValid names are {B,b}ool, {UI,ui,I,i}nt{8,16,32,64} and {F,f}loat{32,64}.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_float_dtype_value","page":"Query operations","title":"Daf.Operations.parse_float_dtype_value","text":"parse_float_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nSimilar to parse_number_dtype_value, but only accept floating point types.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_int_dtype_value","page":"Query operations","title":"Daf.Operations.parse_int_dtype_value","text":"parse_int_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nSimilar to parse_number_dtype_value, but only accept integer (signed or unsigned) types.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.error_invalid_parameter_value","page":"Query operations","title":"Daf.Operations.error_invalid_parameter_value","text":"error_invalid_parameter_value(\n operation_name::Token,\n parameter_name::AbstractString,\n parameter_value::Token,\n must_be::AbstractString,\n)::Nothing\n\nComplain that an operation parameter value is not valid.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.float_dtype_for","page":"Query operations","title":"Daf.Operations.float_dtype_for","text":"float_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that always produces floating point values (e.g., Log). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.int_dtype_for","page":"Query operations","title":"Daf.Operations.int_dtype_for","text":"int_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that always produces integer values (e.g., Round). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.unsigned_dtype_for","page":"Query operations","title":"Daf.Operations.unsigned_dtype_for","text":"unsigned_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that discards the sign of the value (e.g., Abs). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.sum_dtype_for","page":"Query operations","title":"Daf.Operations.sum_dtype_for","text":"sum_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that sums many such values values (e.g., Sum). If dtype isn't nothing, it is returned instead.\n\nThis keeps floating point and 64-bit types as-is, but increases any small integer types to the matching 32 bit type (e.g., an input type of UInt8 will have a sum type of UInt32).\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Index","page":"Query operations","title":"Index","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Pages = [\"operations.md\"]","category":"page"},{"location":"views.html#Views","page":"Views","title":"Views","text":"","category":"section"},{"location":"views.html","page":"Views","title":"Views","text":"Daf.Views\nDaf.Views.DafView\nDaf.Views.viewer\nDaf.Views.ViewAxes\nDaf.Views.ViewData\nDaf.Views.ALL_SCALARS\nDaf.Views.VIEW_ALL_SCALARS\nDaf.Views.ALL_AXES\nDaf.Views.VIEW_ALL_AXES\nDaf.Views.ALL_VECTORS\nDaf.Views.VIEW_ALL_VECTORS\nDaf.Views.ALL_MATRICES\nDaf.Views.VIEW_ALL_MATRICES\nDaf.Views.VIEW_ALL_DATA","category":"page"},{"location":"views.html#Daf.Views","page":"Views","title":"Daf.Views","text":"Create a different view of Daf data using queries. This is a very flexible mechanism which can be used for a variety of use cases. A simple way of using this is to view a subset of the data as a Daf data set. A variant of this also renames the data properties to adapt them to the requirements of some computation. This makes it simpler to create such tools (using fixed, generic property names) and apply them to arbitrary data (with arbitrary specific property names).\n\n\n\n\n\n","category":"module"},{"location":"views.html#Daf.Views.DafView","page":"Views","title":"Daf.Views.DafView","text":"struct DafView(daf::DafReader) <: DafReader\n\nA read-only wrapper for any DafReader data, which exposes an arbitrary view of it as another DafReadOnly. This isn't typically created manually; instead call viewer.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.viewer","page":"Views","title":"Daf.Views.viewer","text":"viewer(\n daf::DafReader;\n [name::Maybe{AbstractString} = nothing,\n axes::Maybe{ViewAxes} = nothing,\n data::Maybe{ViewData} = nothing]\n)::DafReadOnly\n\nWrap daf data with a read-only DafView. The exposed view is defined by a set of queries applied to the original data. These queries are evaluated only when data is actually accessed. Therefore, creating a view is a relatively cheap operation.\n\nIf the name is not specified, the result name will be based on the name of daf, with a .view suffix.\n\nQueries are listed separately for axes, and scalars, vector and matrix properties, as follows:\n\nnote: Note\nAs an optimization, calling viewer with all-empty (default) arguments returns a simple DafReadOnlyWrapper, that is, it is equivalent to calling read_only. Additionally, saying data = VIEW_ALL_DATA will expose all the data using any of the exposed axes; you can write data = [VIEW_ALL_DATA..., key => nothing] to hide specific data based on its key.\n\n\n\n\n\n","category":"function"},{"location":"views.html#Daf.Views.ViewAxes","page":"Views","title":"Daf.Views.ViewAxes","text":"Specify axes to expose from a view.\n\nThis is specified as a vector of pairs (similar to initializing a Dict). The order of the pairs matter (last one wins).\n\nIf the key is \"*\", then it is replaced by all the names of the axes of the wrapped daf data. Otherwise, the key is just the name of an axis.\n\nIf the value is nothing, then the axis will not be exposed by the view. If the value is \"=\", then the axis will be exposed with the same entries as in the original daf data. Otherwise the value is any valid query that returns a vector of (unique!) strings to serve as the vector entries.\n\nThat is, saying \"*\" => \"=\" (or, VIEW_ALL_AXES will expose all the original daf data axes from the view. Following this by saying \"type\" => nothing will hide the type from the view. Saying \"batch\" => q\"/ batch & age > 1 will expose the batch axis, but only including the batches whose age property is greater than 1.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ViewAxes = AbstractVector{Pair{AbstractString, Maybe{Union{AbstractString, Query}}}}But what we are forced to say is:ViewAxes = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.ViewData","page":"Views","title":"Daf.Views.ViewData","text":"Specify data to expose from view. This is specified as a vector of pairs (similar to initializing a Dict). The order of the pairs matter (last one wins).\n\nScalars are specified similarly to ViewAxes, except that the query should return a scalar instead of a vector. That is, saying \"*\" => \"=\" (or, VIEW_ALL_SCALARS) will expose all the original daf data scalars from the view. Following this by saying \"version\" => nothing will hide the version from the view. Adding \"total_umis\" => q\"/ cell / gene : UMIs %> Sum %> Sum\" will expose a total_umis scalar containing the total sum of all UMIs of all genes in all cells, etc.\n\nVectors are specified similarly to scalars, but require a key specifying both an axis and a property name. The axis must be exposed by the view (based on the axes parameter). If the axis is \"*\", it is replaces by all the exposed axis names specified by the axes parameter. Similarly, if the property name is \"*\" (e.g., (\"gene\", \"*\")), then it is replaced by all the vector properties of the exposed axis in the base data. Therefore if the pair is (\"*\", \"*\") => \"=\" (or VIEW_ALL_VECTORS)`, all vector properties of all the (exposed) axes will also be exposed.\n\nThe value for vectors must be the suffix of a vector query based on the appropriate axis; a value of \"=\" is again used to expose the property as-is. That is, the value for the vector will normally start with the : (Lookup) query operator.\n\nThat is, specifying that axes = [\"gene\" => q\"/ gene & marker\"], and then that data = [(\"gene\", \"forbidden\") => q\": lateral\"], then the view will expose a forbidden vector property for the gene axis, by applying the combined query / gene & marker : lateral to the original daf data.\n\nThis gets trickier when using a query reducing a matrix to a vector. In these cases, the value query will start with / (Axis) query operator to specify the reduced matrix axis, followed by the : (Lookup) operator. When constructing the full query for the data, we can't simply concatenate the suffix to the axis query prefix; instead we need to swap the order of the axes (this is because Julia, in its infinite wisdom, uses column-major matrices, like R and matlab; so reduction eliminates the rows instead of the columns of the matrix).\n\nThat is, specifying axes = [\"cell\" => q\"/ cell & type = TCell\"], and then data = [(\"cell\", \"total_noisy_UMIs\") => q\"/ gene & noisy : UMIs %> Sum will expose total_noisy_UMIs as a per-cell vector property, using the query / gene & noisy / cell & type = TCell : UMIs %> Sum, which will compute the sum of the UMIs of all the noisy genes for each cell (whose type is TCell).\n\nMatrices require a key specifying both axes and a property name. The axes must both be exposed by the view (based on the axes parameter). Again if any or both of the axes are \"*\", they are replaced by all the exposed axes (based on the axes parameter), and likewise if the name is \"*\", it replaced by all the matrix properties of the axes. The value for matrices can again be \"=\" to expose the property as is, or the suffix of a matrix query. Therefore if the pair is (\"*\", \"*\", \"*\") => \"=\" (or, VIEW_ALL_MATRICES), all matrix properties of all the (exposed) axes will also be exposed.\n\nThat is, assuming a gene and cell axes were exposed by the axes parameter, then specifying that (\"cell\", \"gene\", \"log_UMIs\") => q\": UMIs % Log base 2 eps\" will expose the matrix log_UMIs for each cell and gene.\n\nThe order of the axes does not matter, so data = [(\"gene\", \"cell\", \"UMIs\") => \"=\"] has the same effect as data = [(\"cell\", \"gene\", \"UMIs\") => \"=\"].\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ViewData = AbstractVector{Pair{DataKey, Maybe{Union{AbstractString, Query}}}}But what we are forced to say is:ViewData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.ALL_SCALARS","page":"Views","title":"Daf.Views.ALL_SCALARS","text":"A key to use in the data parameter of viewer to specify all the base data scalars.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_SCALARS","page":"Views","title":"Daf.Views.VIEW_ALL_SCALARS","text":"A pair to use in the data parameter of viewer to specify the view exposes all the base data scalars.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_AXES","page":"Views","title":"Daf.Views.ALL_AXES","text":"A pair to use in the axes parameter of viewer to specify all the base data axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_AXES","page":"Views","title":"Daf.Views.VIEW_ALL_AXES","text":"A pair to use in the axes parameter of viewer to specify the view exposes all the base data axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_VECTORS","page":"Views","title":"Daf.Views.ALL_VECTORS","text":"A key to use in the data parameter of viewer to specify all the vectors of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_VECTORS","page":"Views","title":"Daf.Views.VIEW_ALL_VECTORS","text":"A pair to use in the data parameter of viewer to specify the view exposes all the vectors of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_MATRICES","page":"Views","title":"Daf.Views.ALL_MATRICES","text":"A key to use in the data parameter of viewer to specify all the matrices of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_MATRICES","page":"Views","title":"Daf.Views.VIEW_ALL_MATRICES","text":"A pair to use in the data parameter of viewer to specify the view exposes all the matrices of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_DATA","page":"Views","title":"Daf.Views.VIEW_ALL_DATA","text":"A vector of pairs to use in the data parameters of viewer (using ...) to specify the view exposes all the data of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Index","page":"Views","title":"Index","text":"","category":"section"},{"location":"views.html","page":"Views","title":"Views","text":"Pages = [\"views.md\"]","category":"page"},{"location":"writers.html#Writers","page":"Writers","title":"Writers","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers","category":"page"},{"location":"writers.html#Daf.Writers","page":"Writers","title":"Daf.Writers","text":"filled vector: The DafWriter interface specify a high-level API for writing Daf data. This API is implemented here, on top of the low-level FormatWriter API. This is an extension of the DafReader API and provides provides thread safety for reading and writing to the same data set from multiple threads, so the low-level API can (mostly) ignore this issue.\n\n\n\n\n\n","category":"module"},{"location":"writers.html#Scalar-properties","page":"Writers","title":"Scalar properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_scalar!\nDaf.Writers.delete_scalar!\nDaf.Writers.scalar_names","category":"page"},{"location":"writers.html#Daf.Writers.set_scalar!","page":"Writers","title":"Daf.Writers.set_scalar!","text":"set_scalar!(\n daf::DafWriter,\n name::AbstractString,\n value::StorageScalar;\n [overwrite::Bool = false]\n)::Nothing\n\nSet the value of a scalar property with some name in daf.\n\nIf not overwrite (the default), this first verifies the name scalar property does not exist.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_scalar!","page":"Writers","title":"Daf.Writers.delete_scalar!","text":"delete_scalar!(\n daf::DafWriter,\n name::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete a scalar property with some name from daf.\n\nIf must_exist (the default), this first verifies the name scalar property exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Readers.scalar_names","page":"Writers","title":"Daf.Readers.scalar_names","text":"scalar_names(daf::DafReader)::AbstractStringSet\n\nThe names of the scalar properties in daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Writers-axes","page":"Writers","title":"Writers axes","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.add_axis!\nDaf.Writers.delete_axis!","category":"page"},{"location":"writers.html#Daf.Writers.add_axis!","page":"Writers","title":"Daf.Writers.add_axis!","text":"add_axis!(\n daf::DafWriter,\n axis::AbstractString,\n entries::AbstractStringVector\n)::Nothing\n\nAdd a new axis to daf.\n\nThis first verifies the axis does not exist and that the entries are unique.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_axis!","page":"Writers","title":"Daf.Writers.delete_axis!","text":"delete_axis!(\n daf::DafWriter,\n axis::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete an axis from the daf. This will also delete any vector or matrix properties that are based on this axis.\n\nIf must_exist (the default), this first verifies the axis exists in the daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Vector-properties","page":"Writers","title":"Vector properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_vector!\nDaf.Writers.delete_vector!","category":"page"},{"location":"writers.html#Daf.Writers.set_vector!","page":"Writers","title":"Daf.Writers.set_vector!","text":"set_vector!(\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n vector::Union{StorageScalar, StorageVector};\n [overwrite::Bool = false]\n)::Nothing\n\nSet a vector property with some name for some axis in daf.\n\nIf the vector specified is actually a StorageScalar, the stored vector is filled with this value.\n\nThis first verifies the axis exists in daf, that the property name isn't name, and that the vector has the appropriate length. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_vector!","page":"Writers","title":"Daf.Writers.delete_vector!","text":"delete_vector!(\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete a vector property with some name for some axis from daf.\n\nThis first verifies the axis exists in daf and that the property name isn't name. If must_exist (the default), this also verifies the name vector exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Matrix-properties","page":"Writers","title":"Matrix properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_matrix!\nDaf.Writers.relayout_matrix!\nDaf.Writers.delete_matrix!","category":"page"},{"location":"writers.html#Daf.Writers.set_matrix!","page":"Writers","title":"Daf.Writers.set_matrix!","text":"set_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n matrix::Union{StorageNumber, StorageMatrix};\n [overwrite::Bool = false,\n relayout::Bool = true]\n)::Nothing\n\nSet the matrix property with some name for some rows_axis and columns_axis in daf. Since this is Julia, this should be a column-major matrix.\n\nIf the matrix specified is actually a StorageScalar, the stored matrix is filled with this value.\n\nIf relayout (the default), this will also automatically relayout! the matrix and store the result, so the data would also be stored in row-major layout (that is, with the axes flipped), similarly to calling relayout_matrix!.\n\nThis first verifies the rows_axis and columns_axis exist in daf, that the matrix is column-major of the appropriate size. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.relayout_matrix!","page":"Writers","title":"Daf.Writers.relayout_matrix!","text":"relayout_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [overwrite::Bool = false]\n)::Nothing\n\nGiven a matrix property with some name exists (in column-major layout) in daf for the rows_axis and the columns_axis, then relayout! it and store the row-major result as well (that is, with flipped axes).\n\nThis is useful following calling empty_dense_matrix! or empty_sparse_matrix! to ensure both layouts of the matrix are stored in def. When calling set_matrix!, it is simpler to just specify (the default) relayout = true.\n\nThis first verifies the rows_axis and columns_axis exist in daf, and that there is a name (column-major) matrix property for them. If not overwrite (the default), this also verifies the name matrix does not exist for the flipped rows_axis and columns_axis.\n\nnote: Note\nA restriction of the way Daf stores data is that square data is only stored in one (column-major) layout (e.g., to store a weighted directed graph between cells, you may store an outgoingweights matrix where each cell's column holds the outgoing weights from the cell to the other cells. In this case you can't ask Daf to relayout the matrix to row-major order so that each cell's row would be the incoming weights from the other cells. Instead you would need to explicitly store a separate incomingweights matrix where each cell's column holds the incoming weights).\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_matrix!","page":"Writers","title":"Daf.Writers.delete_matrix!","text":"delete_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [must_exist::Bool = true,\n relayout::Bool = true]\n)::Nothing\n\nDelete a matrix property with some name for some rows_axis and columns_axis from daf.\n\nIf relayout (the default), this will also delete the matrix in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exist in daf. If must_exist (the default), this also verifies the name matrix exists for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Creating-properties","page":"Writers","title":"Creating properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.empty_dense_vector!\nDaf.Writers.empty_sparse_vector!\nDaf.Writers.empty_dense_matrix!\nDaf.Writers.empty_sparse_matrix!","category":"page"},{"location":"writers.html#Daf.Writers.empty_dense_vector!","page":"Writers","title":"Daf.Writers.empty_dense_vector!","text":"empty_dense_vector!(\n fill::Function,\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T};\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber}\n\nCreate an empty dense vector property with some name for some axis in daf, pass it to fill, and return the result.\n\nThe returned vector will be uninitialized; the caller is expected to fill it with values. This saves creating a copy of the vector before setting it in the data, which makes a huge difference when creating vectors on disk (using memory mapping). For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis first verifies the axis exists in daf and that the property name isn't name. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_sparse_vector!","page":"Writers","title":"Daf.Writers.empty_sparse_vector!","text":"empty_sparse_vector!(\n fill::Function,\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n indtype::Maybe{Type{I}} = nothing;\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber, I <: StorageInteger}\n\nCreate an empty sparse vector property with some name for some axis in daf, pass its parts (nzind and nzval) to fill, and return the result.\n\nIf indtype is not specified, it is chosen automatically to be the smallest unsigned integer type needed for the vector.\n\nThe returned vector will be uninitialized; the caller is expected to fill its nzind and nzval vectors with values. Specifying the nnz makes their sizes known in advance, to allow pre-allocating disk data. For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis severely restricts the usefulness of this function, because typically nnz is only know after fully computing the matrix. Still, in some cases a large sparse vector is created by concatenating several smaller ones; this function allows doing so directly into the data vector, avoiding a copy in case of memory-mapped disk formats.\n\nwarning: Warning\nIt is the caller's responsibility to fill the two vectors with valid data. Specifically, you must ensure:nzind[1] == 1\nnzind[i] <= nzind[i + 1]\nnzind[end] == nnz\n\nThis first verifies the axis exists in daf and that the property name isn't name. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_dense_matrix!","page":"Writers","title":"Daf.Writers.empty_dense_matrix!","text":"empty_dense_matrix!(\n fill::Function,\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T};\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber}\n\nCreate an empty dense matrix property with some name for some rows_axis and columns_axis in daf, pass it to fill, and return the result. Since this is Julia, this will be a column-major matrix.\n\nThe returned matrix will be uninitialized; the caller is expected to fill it with values. This saves creating a copy of the matrix before setting it in daf, which makes a huge difference when creating matrices on disk (using memory mapping). For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis first verifies the rows_axis and columns_axis exist in daf, that the matrix is column-major of the appropriate size. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_sparse_matrix!","page":"Writers","title":"Daf.Writers.empty_sparse_matrix!","text":"empty_sparse_matrix!(\n fill::Function,\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n intdype::Maybe{Type{I}} = nothing;\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber, I <: StorageInteger}\n\nCreate an empty sparse matrix property with some name for some rows_axis and columns_axis in daf, pass its parts (colptr, rowval and nzval) to fill, and return the result.\n\nIf indtype is not specified, it is chosen automatically to be the smallest unsigned integer type needed for the matrix.\n\nThe returned matrix will be uninitialized; the caller is expected to fill its colptr, rowval and nzval vectors. Specifying the nnz makes their sizes known in advance, to allow pre-allocating disk space. For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis severely restricts the usefulness of this function, because typically nnz is only know after fully computing the matrix. Still, in some cases a large sparse matrix is created by concatenating several smaller ones; this function allows doing so directly into the data, avoiding a copy in case of memory-mapped disk formats.\n\nwarning: Warning\n\n\nIt is the caller's responsibility to fill the three vectors with valid data. Specifically, you must ensure:\n\ncolptr[1] == 1\ncolptr[end] == nnz + 1\ncolptr[i] <= colptr[i + 1]\nfor all j, for all i such that colptr[j] <= i and i + 1 < colptr[j + 1], 1 <= rowptr[i] < rowptr[i + 1] <= nrows\n\nThis first verifies the rows_axis and columns_axis exist in daf. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Index","page":"Writers","title":"Index","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Pages = [\"writers.md\"]","category":"page"},{"location":"formats.html#Formats","page":"Formats","title":"Formats","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats\nDaf.Formats.DataKey","category":"page"},{"location":"formats.html#Daf.Formats","page":"Formats","title":"Daf.Formats","text":"The FormatReader and FormatWriter interfaces specify a low-level API for storing Daf data. To extend Daf to support an additional format, create a new implementation of this API.\n\nA storage format object contains some named scalar data, a set of axes (each with a unique name for each entry), and named vector and matrix data based on these axes.\n\nData properties are identified by a unique name given the axes they are based on. That is, there is a separate namespace for scalar properties, vector properties for each specific axis, and matrix properties for each (ordered) pair of axes.\n\nFor matrices, we keep careful track of their MatrixLayouts. Specifically, a storage format only deals with column-major matrices, listed under the rows axis first and the columns axis second. A storage format object may hold two copies of the same matrix, in both possible memory layouts, in which case it will be listed twice, under both axes orders.\n\nIn general, storage format objects are as \"dumb\" as possible, to make it easier to support new storage formats. The required functions implement a glorified key-value repository, with the absolutely minimal necessary logic to deal with the separate property namespaces listed above.\n\nFor clarity of documentation, we split the type hierarchy to DafWriter <: FormatWriter <: DafReader <: FormatReader.\n\nThe functions listed here use the FormatReader for read-only operations and FormatWriter for write operations into a Daf storage. This is a low-level API, not meant to be used from outside the package, and therefore is not re-exported from the top-level Daf namespace.\n\nIn contrast, the functions using DafReader and DafWriter describe the high-level API meant to be used from outside the package, and are re-exported. These functions are listed in the Daf.Readers and Daf.Writers modules. These functions provide all the logic common to any storage format, allowing us to keep the format-specific functions as simple as possible.\n\nThat is, when implementing a new Daf storage format, you should write struct MyFormat <: DafWriter, and implement the functions listed here for both FormatReader and FormatWriter.\n\n\n\n\n\n","category":"module"},{"location":"formats.html#Daf.Formats.DataKey","page":"Formats","title":"Daf.Formats.DataKey","text":"A key specifying some data property in Daf.\n\nScalars are identified by their name.\n\nVectors are specified as a tuple of the axis name and the property name.\n\nMatrices are specified as a tuple or the rows axis, the columns axis, and the property name.\n\nThe DafReader and DafWriter interfaces do not use this type, as each function knows exactly the type of data property it works on. However, higher-level APIs do use this as keys for dictionaries etc.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Read-API","page":"Formats","title":"Read API","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.DafReader\nDaf.Formats.FormatReader\nDaf.Formats.Internal","category":"page"},{"location":"formats.html#Daf.Formats.DafReader","page":"Formats","title":"Daf.Formats.DafReader","text":"A high-level abstract interface for read-only access to Daf data.\n\nAll the functions for this type are provided based on the functions required for FormatReader. See the Daf.Readers module for their description.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.FormatReader","page":"Formats","title":"Daf.Formats.FormatReader","text":"An low-level abstract interface for reading from Daf storage formats.\n\nWe require each storage format to have a .internal::Internal property. This enables all the high-level DafReader functions.\n\nEach storage format must implement the functions listed below for reading from the storage.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.Internal","page":"Formats","title":"Daf.Formats.Internal","text":"Internal(name::AbstractString)\n\nInternal data we need to keep in any concrete FormatReader. This has to be available as a .internal data member of the concrete format. This enables all the high-level DafReader and DafWriter functions.\n\nThe constructor will automatically call unique_name to try and make the names unique for improved error messages.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Caching","page":"Formats","title":"Caching","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.CacheType\nDaf.Formats.empty_cache!","category":"page"},{"location":"formats.html#Daf.Formats.CacheType","page":"Formats","title":"Daf.Formats.CacheType","text":"Types of cached data inside Daf.\n\nMappedData - memory-mapped disk data. This is the cheapest data, as it doesn't put pressure on the garbage collector. It requires some OS resources to maintain the mapping, and physical memory for the subset of the data that is actually being accessed. That is, one can memory map larger data than the physical memory, and performance will be good, as long as the subset of the data that is actually accessed is small enough to fit in memory. If it isn't, the performance will drop (a lot!) because the OS will be continuously reading data pages from disk - but it will not crash due to an out of memory error. It is very important not to re-map the same data twice because that causes all sort of inefficiencies and edge cases in the hardware and low-level software.\nMemoryData - disk data copied to application memory, or alternative layout of data matrices. This does pressure the garbage collector and can cause out of memory errors. However, re-fetching the data from disk is very slow, so caching this data is crucial for performance.\nQueryData - data that is computed by queries based on stored data (e.g., masked data, or results of a reduction or an element-wise operation). This again takes up application memory and may cause out of memory errors, but it is very useful to cache the results when the same query is executed multiple times (e.g., when using views). Manually executing queries therefore allows to explicitly disable the caching of the query results, since some queries will not be repeated.\n\nIf too much data has been cached, call empty_cache! to release it.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.empty_cache!","page":"Formats","title":"Daf.Formats.empty_cache!","text":"empty_cache!(\n daf::FormatReader;\n [clear::Maybe{CacheType} = nothing,\n keep::Maybe{CacheType} = nothing]\n)::Nothing\n\nClear some cached data. By default, completely empties the caches. You can specify either clear, to only forget a specific CacheType (e.g., for clearing only QueryData), or keep, to forget everything except a specific CacheType (e.g., for keeping only MappedData). You can't specify both clear and keep.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Description","page":"Formats","title":"Description","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_description_header\nDaf.Formats.format_description_footer","category":"page"},{"location":"formats.html#Daf.Formats.format_description_header","page":"Formats","title":"Daf.Formats.format_description_header","text":"format_description_header(format::FormatReader, lines::Vector{String})::Nothing\n\nAllow a format to amit additional description header lines.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_description_footer","page":"Formats","title":"Daf.Formats.format_description_footer","text":"format_description_footer(format::FormatReader, lines::Vector{String})::Nothing\n\nAllow a format to amit additional description footer lines. If deep, this also emit the description of any data sets nested in this one, if any.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Scalar-properties","page":"Formats","title":"Scalar properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_scalar\nDaf.Formats.format_scalar_names\nDaf.Formats.format_get_scalar","category":"page"},{"location":"formats.html#Daf.Formats.format_has_scalar","page":"Formats","title":"Daf.Formats.format_has_scalar","text":"format_has_scalar(format::FormatReader, name::AbstractString)::Bool\n\nCheck whether a scalar property with some name exists in format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_scalar_names","page":"Formats","title":"Daf.Formats.format_scalar_names","text":"format_scalar_names(format::FormatReader)::AbstractStringSet\n\nThe names of the scalar properties in format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_scalar","page":"Formats","title":"Daf.Formats.format_get_scalar","text":"format_get_scalar(format::FormatReader, name::AbstractString)::StorageScalar\n\nImplement fetching the value of a scalar property with some name in format.\n\nThis trusts that we have a read lock on the data set, and that the name scalar property exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Data-axes","page":"Formats","title":"Data axes","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_axis\nDaf.Formats.format_axis_names\nDaf.Formats.format_get_axis\nDaf.Formats.format_axis_length","category":"page"},{"location":"formats.html#Daf.Formats.format_has_axis","page":"Formats","title":"Daf.Formats.format_has_axis","text":"format_has_axis(format::FormatReader, axis::AbstractString; for_change::Bool)::Bool\n\nCheck whether some axis exists in format. If for_change, this is done just prior to adding or deleting the axis.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_axis_names","page":"Formats","title":"Daf.Formats.format_axis_names","text":"format_axis_names(format::FormatReader)::AbstractStringSet\n\nThe names of the axes of format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_axis","page":"Formats","title":"Daf.Formats.format_get_axis","text":"format_get_axis(format::FormatReader, axis::AbstractString)::AbstractStringVector\n\nImplement fetching the unique names of the entries of some axis of format.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_axis_length","page":"Formats","title":"Daf.Formats.format_axis_length","text":"format_axis_length(format::FormatReader, axis::AbstractString)::Int64\n\nImplement fetching the number of entries along the axis.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Vector-properties","page":"Formats","title":"Vector properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_vector\nDaf.Formats.format_vector_names\nDaf.Formats.format_get_vector","category":"page"},{"location":"formats.html#Daf.Formats.format_has_vector","page":"Formats","title":"Daf.Formats.format_has_vector","text":"format_has_vector(format::FormatReader, axis::AbstractString, name::AbstractString)::Bool\n\nImplement checking whether a vector property with some name exists for the axis in format.\n\nThis trusts that we have a read lock on the data set, that the axis exists in format and that the property name isn't name.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_vector_names","page":"Formats","title":"Daf.Formats.format_vector_names","text":"format_vector_names(format::FormatReader, axis::AbstractString)::AbstractStringSet\n\nImplement fetching the names of the vectors for the axis in format, not including the special name property.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_vector","page":"Formats","title":"Daf.Formats.format_get_vector","text":"format_get_vector(format::FormatReader, axis::AbstractString, name::AbstractString)::StorageVector\n\nImplement fetching the vector property with some name for some axis in format.\n\nThis trusts that we have a read lock on the data set, that the axis exists in format, and the name vector property exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Matrix-properties","page":"Formats","title":"Matrix properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_matrix\nDaf.Formats.format_matrix_names\nDaf.Formats.format_get_matrix","category":"page"},{"location":"formats.html#Daf.Formats.format_has_matrix","page":"Formats","title":"Daf.Formats.format_has_matrix","text":"format_has_matrix(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [for_relayout::Bool = false]\n)::Bool\n\nImplement checking whether a matrix property with some name exists for the rows_axis and the columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and the columns_axis exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_matrix_names","page":"Formats","title":"Daf.Formats.format_matrix_names","text":"format_matrix_names(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n)::AbstractStringSet\n\nImplement fetching the names of the matrix properties for the rows_axis and columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and columns_axis exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_matrix","page":"Formats","title":"Daf.Formats.format_get_matrix","text":"format_get_matrix(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString\n)::StorageMatrix\n\nImplement fetching the matrix property with some name for some rows_axis and columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and columns_axis exist in format, and the name matrix property exists for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Write-API","page":"Formats","title":"Write API","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.DafWriter\nDaf.Formats.FormatWriter","category":"page"},{"location":"formats.html#Daf.Formats.DafWriter","page":"Formats","title":"Daf.Formats.DafWriter","text":"A high-level abstract interface for write access to Daf data.\n\nAll the functions for this type are provided based on the functions required for FormatWriter. See the Daf.Writers module for their description.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.FormatWriter","page":"Formats","title":"Daf.Formats.FormatWriter","text":"An abstract interface for writing into Daf storage formats.\n\nEach storage format must implement the functions listed below for writing into the storage.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Scalar-properties-2","page":"Formats","title":"Scalar properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_scalar!\nDaf.Formats.format_delete_scalar!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_scalar!","page":"Formats","title":"Daf.Formats.format_set_scalar!","text":"format_set_scalar!(\n format::FormatWriter,\n name::AbstractString,\n value::StorageScalar,\n)::Nothing\n\nImplement setting the value of a scalar property with some name in format.\n\nThis trusts that we have a write lock on the data set, and that the name scalar property does not exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_scalar!","page":"Formats","title":"Daf.Formats.format_delete_scalar!","text":"format_delete_scalar!(\n format::FormatWriter,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a scalar property with some name from format. If for_set, this is done just prior to setting the scalar with a different value.\n\nThis trusts that we have a write lock on the data set, and that the name scalar property exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Data-axes-2","page":"Formats","title":"Data axes","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_add_axis!\nDaf.Formats.format_delete_axis!","category":"page"},{"location":"formats.html#Daf.Formats.format_add_axis!","page":"Formats","title":"Daf.Formats.format_add_axis!","text":"format_add_axis!(\n format::FormatWriter,\n axis::AbstractString,\n entries::AbstractStringVector\n)::Nothing\n\nImplement adding a new axis to format.\n\nThis trusts we have a write lock on the data set, that the axis does not already exist in format, and that the names of the entries are unique.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_axis!","page":"Formats","title":"Daf.Formats.format_delete_axis!","text":"format_delete_axis!(format::FormatWriter, axis::AbstractString)::Nothing\n\nImplement deleting some axis from format.\n\nThis trusts This trusts we have a write lock on the data set, that the axis exists in format, and that all properties that are based on this axis have already been deleted.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Vector-properties-2","page":"Formats","title":"Vector properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_vector!\nDaf.Formats.format_delete_vector!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_vector!","page":"Formats","title":"Daf.Formats.format_set_vector!","text":"format_set_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n vector::Union{StorageScalar, StorageVector},\n)::Nothing\n\nImplement setting a vector property with some name for some axis in format.\n\nIf the vector specified is actually a StorageScalar, the stored vector is filled with this value.\n\nThis trusts we have a write lock on the data set, that the axis exists in format, that the vector property name isn't \"name\", that it does not exist for the axis, and that the vector has the appropriate length for it.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_vector!","page":"Formats","title":"Daf.Formats.format_delete_vector!","text":"format_delete_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a vector property with some name for some axis from format. If for_set, this is done just prior to setting the vector with a different value.\n\nThis trusts we have a write lock on the data set, that the axis exists in format, that the vector property name isn't name, and that the name vector exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Matrix-properties-2","page":"Formats","title":"Matrix properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_matrix!\nDaf.Formats.format_relayout_matrix!\nDaf.Formats.format_delete_matrix!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_matrix!","page":"Formats","title":"Daf.Formats.format_set_matrix!","text":"format_set_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n matrix::StorageMatrix,\n)::Nothing\n\nImplement setting the matrix property with some name for some rows_axis and columns_axis in format.\n\nIf the matrix specified is actually a StorageScalar, the stored matrix is filled with this value.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format, that the name matrix property does not exist for them, and that the matrix is column-major of the appropriate size for it.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_relayout_matrix!","page":"Formats","title":"Daf.Formats.format_relayout_matrix!","text":"format_relayout_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString\n)::Nothing\n\nrelayout! the existing name column-major matrix property for the rows_axis and the columns_axis and store the results as a row-major matrix property (that is, with flipped axes).\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis are different from each other, exist in format, that the name matrix property exists for them, and that it does not exist for the flipped axes.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_matrix!","page":"Formats","title":"Daf.Formats.format_delete_matrix!","text":"format_delete_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a matrix property with some name for some rows_axis and columns_axis from format. If for_set, this is done just prior to setting the matrix with a different value.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format, and that the name matrix property exists for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Creating-properties","page":"Formats","title":"Creating properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_empty_dense_vector!\nDaf.Formats.format_empty_sparse_vector!\nDaf.Formats.format_filled_sparse_vector!\nDaf.Formats.format_empty_dense_matrix!\nDaf.Formats.format_empty_sparse_matrix!\nDaf.Formats.format_filled_sparse_matrix!","category":"page"},{"location":"formats.html#Daf.Formats.format_empty_dense_vector!","page":"Formats","title":"Daf.Formats.format_empty_dense_vector!","text":"format_empty_dense_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n)::VectorVector where {T <: StorageNumber}\n\nImplement setting a vector property with some name for some axis in format.\n\nImplement creating an empty dense matrix with some name for some rows_axis and columns_axis in format.\n\nThis trusts we have a write lock on the data set, that the axis exists in format and that the vector property name isn't \"name\", and that it does not exist for the axis.\n\nnote: Note\nThe return type of this function is always a functionally dense vector, that is, it will have strides of (1,), so that elements are consecutive in memory. However it need not be an actual DenseVector because of Julia's type system's limitations.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_empty_sparse_vector!","page":"Formats","title":"Daf.Formats.format_empty_sparse_vector!","text":"format_empty_sparse_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n indtype::Type{I},\n)::Tuple{AbstractVector{I}, AbstractVector{T}, Any}\nwhere {T <: StorageNumber, I <: StorageInteger}\n\nImplement creating an empty dense vector property with some name for some rows_axis and columns_axis in format. The final tuple element is passed to format_filled_sparse_vector!.\n\nThis trusts we have a write lock on the data set, that the axis exists in format and that the vector property name isn't \"name\", and that it does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_sparse_vector!","page":"Formats","title":"Daf.Formats.format_filled_sparse_vector!","text":"format_filled_sparse_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n extra::Any,\n filled::SparseVector{T, I},\n)::Nothing where {T <: StorageNumber, I <: StorageInteger}\n\nAllow the format to perform caching once the empty sparse vector has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_empty_dense_matrix!","page":"Formats","title":"Daf.Formats.format_empty_dense_matrix!","text":"format_empty_dense_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n)::AbstractMatrix{T} where {T <: StorageNumber}\n\nImplement creating an empty dense matrix property with some name for some rows_axis and columns_axis in format.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format and that the name matrix property does not exist for them.\n\nnote: Note\nThe return type of this function is always a functionally dense vector, that is, it will have strides of (1,nrows), so that elements are consecutive in memory. However it need not be an actual DenseMatrix because of Julia's type system's limitations.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_empty_sparse_matrix!","page":"Formats","title":"Daf.Formats.format_empty_sparse_matrix!","text":"format_empty_sparse_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n intdype::Type{I},\n nnz::StorageInteger,\n)::Tuple{AbstractVector{I}, AbstractVector{I}, AbstractVector{T}, Any}\nwhere {T <: StorageNumber, I <: StorageInteger}\n\nImplement creating an empty sparse matrix property with some name for some rows_axis and columns_axis in format. The final tuple element is passed to format_filled_sparse_matrix!.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format and that the name matrix property does not exist for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_sparse_matrix!","page":"Formats","title":"Daf.Formats.format_filled_sparse_matrix!","text":"format_filled_dense_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n extra::Any,\n filled::SparseMatrixCSC{T, I},\n)::Nothing where {T <: StorageNumber, I <: StorageInteger}\n\nAllow the format to perform caching once the empty sparse matrix has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Index","page":"Formats","title":"Index","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Pages = [\"formats.md\"]","category":"page"},{"location":"example_data.html#Example-data","page":"Example data","title":"Example data","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Daf.ExampleData","category":"page"},{"location":"example_data.html#Daf.ExampleData","page":"Example data","title":"Daf.ExampleData","text":"Example data for doctest tests.\n\n\n\n\n\n","category":"module"},{"location":"example_data.html#Example-Storage","page":"Example data","title":"Example Storage","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Daf.ExampleData.example_daf","category":"page"},{"location":"example_data.html#Daf.ExampleData.example_daf","page":"Example data","title":"Daf.ExampleData.example_daf","text":"Create an example MemoryDaf to use for doctest tests.\n\n\n\n\n\n","category":"function"},{"location":"example_data.html#Index","page":"Example data","title":"Index","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Pages = [\"example_data.md\"]","category":"page"},{"location":"read_only.html#Read-only","page":"Read-only","title":"Read-only","text":"","category":"section"},{"location":"read_only.html","page":"Read-only","title":"Read-only","text":"Daf.ReadOnly\nDaf.ReadOnly.DafReadOnly\nDaf.ReadOnly.read_only\nDaf.ReadOnly.DafReadOnlyWrapper","category":"page"},{"location":"read_only.html#Daf.ReadOnly","page":"Read-only","title":"Daf.ReadOnly","text":"Read-only Daf storage format.\n\n\n\n\n\n","category":"module"},{"location":"read_only.html#Daf.ReadOnly.DafReadOnly","page":"Read-only","title":"Daf.ReadOnly.DafReadOnly","text":"A common base type for a read-only DafReader, which doesn't allow any modification of the data.\n\n\n\n\n\n","category":"type"},{"location":"read_only.html#Daf.ReadOnly.read_only","page":"Read-only","title":"Daf.ReadOnly.read_only","text":"read_only(daf::DafReader[; name::Maybe{AbstractString]} = nothing)::DafReadOnlyWrapper\n\nWrap daf with a DafReadOnlyWrapper to protect it against accidental modification. If not specified, the name of the daf is reused. If name is not specified and daf isa DafReadOnly, return it as-is.\n\n\n\n\n\n","category":"function"},{"location":"read_only.html#Daf.ReadOnly.DafReadOnlyWrapper","page":"Read-only","title":"Daf.ReadOnly.DafReadOnlyWrapper","text":"struct DafReadOnlyWrapper <: DafReader ... end\n\nA wrapper for any DafWriter data, protecting it against accidental modification. This isn't exported and isn't created manually; instead call read_only.\n\n\n\n\n\n","category":"type"},{"location":"read_only.html#Index","page":"Read-only","title":"Index","text":"","category":"section"},{"location":"read_only.html","page":"Read-only","title":"Read-only","text":"Pages = [\"read_only.md\"]","category":"page"},{"location":"concat.html#Concat","page":"Concat","title":"Concat","text":"","category":"section"},{"location":"concat.html","page":"Concat","title":"Concat","text":"Daf.Concat\nDaf.Concat.concatenate\nDaf.Concat.MergeData\nDaf.Concat.MergeAction","category":"page"},{"location":"concat.html#Daf.Concat","page":"Concat","title":"Daf.Concat","text":"Concatenate multiple Daf data sets along some axis. This copies the data from the concatenated data sets into some target data set.\n\nThe exact behavior of concatenation is surprisingly complex when accounting for sparse vs. dense matrices, different matrix layouts, and properties which are not along the concatenation axis. The implementation is further complicated by minimizing the allocation of intermediate memory buffers for the data; that is, in principle, concatenating from and into memory-mapped data sets should not allocate \"any\" memory buffers - the data should be copied directly from one memory-mapped region to another.\n\n\n\n\n\n","category":"module"},{"location":"concat.html#Daf.Concat.concatenate","page":"Concat","title":"Daf.Concat.concatenate","text":"concatenate(\n destination::DafWriter,\n axis::Union{AbstractString, AbstractStringVector},\n sources::AbstractVector{<:DafReader};\n [names::Maybe{AbstractStringVector} = nothing,\n dataset_axis::Maybe{AbstractString} = \"dataset\",\n dataset_property::Bool = true,\n prefix::Union{Bool, AbstractVector{Bool}} = false,\n prefixed::Maybe{Union{AbstractStringSet, AbstractVector{<:AbstractStringSet}}} = nothing,\n empty::Maybe{EmptyData} = nothing,\n sparse_if_saves_storage_fraction = 0.25,\n merge::Maybe{MergeData} = nothing,\n overwrite::Bool = false]\n)::Nothing\n\nConcatenate data from a sources sequence of Daf data sets into a single destination data set along one or more concatenation axis. You can also concatenate along multiple axes by specifying an array of axis names.\n\nWe need a unique name for each of the concatenated data sets. By default, we use the DafReader.name. You can override this by specifying an explicit names vector with one name per data set.\n\nBy default, a new axis named by dataset_axis is created with one entry per concatenated data set, using these unique names. You can disable this by setting dataset_axis to nothing.\n\nIf an axis is created, and dataset_property is set (the default), a property with the same name is created for the concatenated axis, containing the name of the data set each entry was collected from.\n\nThe entries of each concatenated axis must be unique. By default, we require that no entry name is used in more than one data set. If this isn't the case, then set prefix to specify adding the unique data set name (and a . separator) to its entries (either once for all the axes, or using a vector with a setting per axis).\n\nnote: Note\nIf a prefix is added to the axis entry names, then it must also be added to all the vector properties whose values are entries of the axis. By default, we assume that any property name that is identical to the axis name is such a property (e.g., given a cluster axis, a cluster property of each cell is assumed to contain the names of clusters from that axis). We also allow for property names to just start with the axis name, followed by . and some suffix (e.g., cluster.manual will also be assumed to contain the names of clusters). We'll automatically add the unique prefix to all such properties.If, however, this heuristic fails, you can specify a vector of properties to be prefixed (or a vector of such vectors, one per concatenated axis). In this case only the listed properties will be prefixed with the unique data set names.\n\nVector and matrix properties for the axis will be concatenated. If some of the concatenated data sets do not contain some property, then an empty value must be specified for it, and will be used for the missing data.\n\nConcatenated matrices are always stored in column-major layout where the concatenation axis is the column axis. There should not exist any matrices whose both axes are concatenated (e.g., square matrices of the concatenated axis).\n\nThe concatenated properties will be sparse if the storage for the sparse data is smaller than naive dense storage by at sparse_if_saves_storage_fraction (by default, if using sparse storage saves at least 25% of the space, that is, takes at most 75% of the dense storage space). When estimating this fraction, we assume dense data is 100% non-zero; we only take into account data already stored as sparse, as well as any missing data whose empty value is zero.\n\nBy default, properties that do not apply to any of the concatenation axis will be ignored. If merge is specified, then such properties will be processed according to it. Using CollectAxis for a property requires that the dataset_axis will not be nothing.\n\nBy default, concatenation will fail rather than overwrite existing properties in the target.\n\n\n\n\n\n","category":"function"},{"location":"concat.html#Daf.Concat.MergeData","page":"Concat","title":"Daf.Concat.MergeData","text":"A vector of pairs where the key is a DataKey and the value is MergeAction. Similarly to ViewData, the order of the entries matters (last one wins), and a key containing \"*\" is expanded to all the relevant properties. For matrices, merge is done separately for each layout. That is, the order of the key (rows_axis, columns_axis, matrix_name) key does matter in the MergeData, which is different from how ViewData works.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:MergeData = AbstractVector{Pair{DataKey, MergeAction}}But what we are forced to say is:ViewData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"concat.html#Daf.Concat.MergeAction","page":"Concat","title":"Daf.Concat.MergeAction","text":"The action for merging the values of a property from the concatenated data sets into the result data set. This is used to properties that do not apply to the concatenation axis (that is, scalar properties, and vector and matrix properties of other axes). Valid values are:\n\nSkipProperty - do not create the property in the result. This is the default.\nLastValue - use the value from the last concatenated data set (that has a value for the property). This is useful for properties that have the same value for all concatenated data sets.\nCollectAxis - collect the values from all the data sets, adding a dimension to the data (that is, convert a scalar property to a vector, and a vector property to a matrix). This can't be applied to matrix properties, because we can't directly store 3D data inside Daf. In addition, this requires that a dataset axis is created in the target, and that an empty value is specified for the property if it is missing from any of the concatenated data sets.\n\n\n\n\n\n","category":"type"},{"location":"concat.html#Index","page":"Concat","title":"Index","text":"","category":"section"},{"location":"concat.html","page":"Concat","title":"Concat","text":"Pages = [\"concat.md\"]","category":"page"},{"location":"queries.html#Queries","page":"Queries","title":"Queries","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries","category":"page"},{"location":"queries.html#Daf.Queries","page":"Queries","title":"Daf.Queries","text":"Extract data from a DafReader.\n\n\n\n\n\n","category":"module"},{"location":"queries.html#Construction","page":"Queries","title":"Construction","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.Query\nDaf.Queries.@q_str","category":"page"},{"location":"queries.html#Daf.Queries.Query","page":"Queries","title":"Daf.Queries.Query","text":"Query(query_string::AbstractString) <: QueryOperation\n\nA query is a description of a (sub-)process for extracting some data from a DafReader. A full query is a sequence of QueryOperation, that when applied one at a time on some DafReader, result in a scalar, vector or matrix result. A single Lookup or a single Axis are also valid complete queries.\n\nTo apply a query, invoke get_query to apply a query to some DafReader data (you can also use the shorthand dafquery instead of get_query(daf query)). By default, query operations will cache their results in memory as QueryData, to speed up repeated queries. This may lock up large amounts of memory; you can empty_cache! to release it.\n\nQueries can be constructed in two ways. In code, a query can be built by chaining query operations (e.g., the expression Axis(\"gene\") |> Lookup(\"is_marker\") looks up the is_marker vector property of the gene axis).\n\nAlternatively, a query can be parsed from a string, which needs to be parsed into a Query object (e.g., the above can be written as Query(\"/gene:is_marker\")). See the QUERY_OPERATORS for a table of supported operators. Spaces (and comments) around the operators are optional; see tokenize for details. You can also convert a Query to a string (or print it, etc.) to see its representation. This is used for error messages and as a key when caching query results.\n\nSince query strings use \\ as an escape character, it is easier to use raw string literals for queries (e.g., Query(raw\"cell = ATGC\\:B1 : age\") vs. Query(\"cell = ATGC\\\\:B1 : age\")). To make this even easier we provide the q macro (e.g., q\"cell = ATGC\\:B1 : batch\") which works similarly to Julia's standard r macro for literal Regex strings.\n\nBeing able to represent queries as strings allows for reading them from configuration files and letting the user input them in an application UI (e.g., allowing the user to specify the X, Y and/or colors of a scatter plot using queries). At the same time, being able to incrementally build queries using code allows for convenient reuse (e.g., reusing axis sub-queries in Daf views), without having to go through the string representation.\n\nDaf provides a comprehensive set of QueryOperations that can be used to construct queries. The QUERY_OPERATORS listed below provide the basic functionality (e.g., specifying an Axis or a property Lookup). In addition, Daf provides computation operations (EltwiseOperation and ReductionOperation), allowing for additional operations to be provided by external packages.\n\nObviously not all possible combinations of operations make sense (e.g., Lookup(\"is_marker\") |> Axis(\"cell\") will not work). For the full list of valid combinations, see NAMES_QUERY, SCALAR_QUERY, VECTOR_QUERY and MATRIX_QUERY below.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.@q_str","page":"Queries","title":"Daf.Queries.@q_str","text":"q\"...\"\n\nShorthand for parsing a literal string as a Query. This is equivalent to Query(raw\"...\"), that is, a \\ can be placed in the string without escaping it (except for before a \"). This is very convenient for literal queries (e.g., q\"/ cell = ATGC\\:B1 : batch\" == Query(raw\"/ cell = ATGC\\:B1 : batch\") == Query(\"/ cell = ATGC\\\\:B1 : batch\") == `Axis(\"cell\") |> IsEqual(\"ATGC:B1\") |> Lookup(\"batch\")).\n\n\n\n\n\n","category":"macro"},{"location":"queries.html#Functions","page":"Queries","title":"Functions","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.get_query\nDaf.Queries.get_frame\nDaf.Queries.QueryColumns\nDaf.Queries.query_result_dimensions\nDaf.Queries.is_axis_query","category":"page"},{"location":"queries.html#Daf.Queries.get_query","page":"Queries","title":"Daf.Queries.get_query","text":"get_query(\n daf::DafReader,\n query::Union{Query, AbstractString};\n [cache::Bool = true]\n)::Union{StorageScalar, NamedVector, NamedMatrix}\n\nApply the full query to the Daf data and return the result. By default, this will cache results, so repeated queries will be accelerated. This may consume a large amount of memory. You can disable it by specifying cache = false, or release the cached data using empty_cache!.\n\nAs a shorthand syntax you can also invoke this using getindex, that is, using the [] operator (e.g., daf[q\"/ cell\"] is equivalent to get_query(daf, q\"/ cell\")).\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.get_frame","page":"Queries","title":"Daf.Queries.get_frame","text":"get_frame(\n daf::DafReader,\n axis::Union{Query, AbstractString},\n [columns::Maybe{Union{AbstractStringVector, QueryColumns}} = nothing;\n cache::Bool = true]\n)::DataFrame end\n\nReturn a DataFrame containing multiple vectors of the same axis.\n\nThe axis can be either just the name of an axis (e.g., \"cell\"), or a query for the axis (e.g., q\"/ cell\"), possibly using a mask (e.g., q\"/ cell & age > 1\"). The result of the query must be a vector of unique axis entry names.\n\nIf columns is not specified, the data frame will contain all the vector properties of the axis, in alphabetical order (since DataFrame has no concept of named rows, the 1st column will contain the name of the axis entry). Otherwise, columns may be a vector of names of vector properties (e.g., [\"batch\", \"age\"]), or a vector of pairs mapping a column name to a query suffix (e.g., [\"color\" => q\": type => color\"]). This suffix is applied to the axis query (e.g., if the axis is masked as above, the full query for the color column would be q\"/ cell & age > 1 : type => color). The result of the full query must be a vector.\n\nBy default, this will cache results of all queries. This may consume a large amount of memory. You can disable it by specifying cache = false, or release the cached data using empty_cache!.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.QueryColumns","page":"Queries","title":"Daf.Queries.QueryColumns","text":"Specify columns for a data frame. This is a vector of pairs, where the key is the column name, and the value is a query that computes the data of the column.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:QueryColumns = AbstractVector{Pair{AbstractString, Union{AbstractString, Query}}}But what we are forced to say is:QueryColumns = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.query_result_dimensions","page":"Queries","title":"Daf.Queries.query_result_dimensions","text":"query_result_dimensions(query::Union{Query, AbstractString})::Int\n\nReturn the number of dimensions (-1 - names, 0 - scalar, 1 - vector, 2 - matrix) of the results of a query. This also verifies the query is syntactically valid, though it may still fail if applied to specific data due to invalid data values or types.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.is_axis_query","page":"Queries","title":"Daf.Queries.is_axis_query","text":"is_axis_query(query::Union{Query, AbstractString})::Bool\n\nReturns whether the query specifies a (possibly masked) axis. This also verifies the query is syntactically valid, though it may still fail if applied to specific data due to invalid data values or types.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Syntax","page":"Queries","title":"Syntax","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.QUERY_OPERATORS\nDaf.Queries.NAMES_QUERY\nDaf.Queries.SCALAR_QUERY\nDaf.Queries.LOOKUP_PROPERTY\nDaf.Queries.VECTOR_ENTRY\nDaf.Queries.MATRIX_ENTRY\nDaf.Queries.REDUCE_VECTOR\nDaf.Queries.VECTOR_QUERY\nDaf.Queries.VECTOR_PROPERTY\nDaf.Queries.VECTOR_LOOKUP\nDaf.Queries.MATRIX_ROW\nDaf.Queries.MATRIX_COLUMN\nDaf.Queries.REDUCE_MATRIX\nDaf.Queries.MATRIX_QUERY\nDaf.Queries.MATRIX_LOOKUP\nDaf.Queries.COUNTS_MATRIX\nDaf.Queries.POST_PROCESS\nDaf.Queries.GROUP_BY\nDaf.Queries.AXIS_MASK\nDaf.Queries.MASK_OPERATION\nDaf.Queries.VECTOR_FETCH\nDaf.Queries.ComparisonOperation\nDaf.Queries.guess_typed_value","category":"page"},{"location":"queries.html#Daf.Queries.QUERY_OPERATORS","page":"Queries","title":"Daf.Queries.QUERY_OPERATORS","text":"Operators used to represent a Query as a string.\n\nOperator Implementation Description\n/ Axis Specify a vector or matrix axis (e.g., / cell : batch or / cell / gene : UMIs).\n? Names 1. Names of scalars or axes (? axes, ? scalars).\n 2. Names of vectors of axis (e.g., / cell ?).\n 3. Names of matrices of axes (e.g., / cell / gene ?).\n: Lookup Lookup a property (e.g., @ version, / cell : batch or / cell / gene : UMIs).\n=> Fetch Fetch a property from another axis (e.g., / cell : batch => age).\n! AsAxis 1. Specify axis name when fetching a property (e.g., / cell : manual ! type => color).\n 2. Force all axis values when counting (e.g., / cell : batch ! * manual ! type).\n 3. Force all axis values when grouping (e.g., / cell : age @ batch ! %> Mean).\n?? IfNot 1. Mask excluding false-ish values (e.g., / cell : batch ?? => age).\n 2. Default for false-ish lookup values (e.g., / cell : type ?? Outlier).\n 3. Default for false-ish fetched values (e.g., / cell : batch ?? 1 => age).\n││ IfMissing 1. Value for missing lookup properties (e.g., / gene : is_marker ││ false).\n 2. Value for missing fetched properties (e.g., `/ cell : type\n 3. Value for empty reduced vectors (e.g., `/ cell : type = LMPP => age %> Max\n% EltwiseOperation Apply an element-wise operation (e.g., / cell / gene : UMIs % Log base 2 eps 1).\n%> ReductionOperation Apply a reduction operation (e.g., / cell / gene : UMIs %> Sum).\n* CountBy Compute counts matrix (e.g., / cell : age * type).\n@ GroupBy 1. Aggregate vector entries by a group (e.g., / cell : age @ type %> Mean).\n 2. Aggregate matrix row entries by a group (e.g.,/ cell / gene : UMIs @ type %> Max).\n& And Restrict axis entries (e.g., / gene & is_marker).\n&! AndNot Restrict axis entries (e.g., / gene &! is_marker).\n│ Or Expand axis entries (e.g., / gene & is_marker │ is_noisy).\n│! OrNot Expand axis entries (e.g., / gene & is_marker │! is_noisy).\n^ Xor Flip axis entries (e.g., / gene & is_marker ^ is_noisy).\n^! XorNot Flip axis entries (e.g., / gene & is_marker ^! is_noisy).\n= IsEqual 1. Select an entry from an axis (e.g., / cell / gene = FOX1 : UMIs).\n 2. Compare equal (e.g., / cell & age = 1).\n!= IsNotEqual Compare not equal (e.g., / cell & age != 1).\n< IsLess Compare less than (e.g., / cell & age < 1).\n<= IsLessEqual Compare less or equal (e.g., / cell & age <= 1).\n> IsGreater Compare greater than (e.g., / cell & age > 1).\n>= IsGreaterEqual Compare greater or equal (e.g., / cell & age >= 1).\n~ IsMatch Compare match (e.g., / gene & name ~ RP\\[SL\\]).\n!~ IsNotMatch Compare not match (e.g., / gene & name !~ RP\\[SL\\]).\n\nnote: Note\nDue to Julia's Documenter limitations, the ASCII | character (|) is replaced by the Unicode │ character (│) in the above table. Sigh.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.NAMES_QUERY","page":"Queries","title":"Daf.Queries.NAMES_QUERY","text":"NAMES_QUERY := ( Names scalars | Names axes | Axis Names | Axis Axis Names )\n\nA query returning a set of names:\n\nLooking up the set of names of the scalar properties (? scalars).\nLooking up the set of names of the axes (? axes).\nLooking up the set of names of the vector properties of an axis (e.g., / cell ?).\nLooking up the set of names of the matrix properties of a pair of axes (e.g., / cell / gene ?).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.SCALAR_QUERY","page":"Queries","title":"Daf.Queries.SCALAR_QUERY","text":"SCALAR_QUERY := ( LOOKUP_PROPERTY](@ref) | VECTOR_ENTRY | MATRIX_ENTRY | REDUCE_VECTOR ) EltwiseOperation*\n\nA query returning a scalar can be one of:\n\nLooking up the value of a scalar property (e.g., : version will return the value of the version scalar property).\nPicking a single entry of a vector property (e.g., / gene = FOX1 : is_marker will return whether the gene named FOX1 is a marker gene).\nPicking a single entry of a matrix property (e.g., / gene = FOX1 / cell = ATGC : UMIs will return the number of UMIs of the FOX1 gene of the ATGC cell).\nReducing some vector into a single value (e.g., / donor : age %> Mean will compute the mean age of all the donors).\n\nEither way, this can be followed by a series of EltwiseOperation to modify the scalar result (e.g., / donor : age %> Mean % Log base 2 % Abs will compute the absolute value of the log base 2 of the mean age of all the donors).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.LOOKUP_PROPERTY","page":"Queries","title":"Daf.Queries.LOOKUP_PROPERTY","text":"LOOKUP_PROPERTY := Lookup IfMissing?\n\nLookup the value of a scalar or matrix property. This is used on its own to access a scalar property (e.g., : version) or combined with two axes to access a matrix property (e.g., / cell / gene : UMIs).\n\nBy default, it is an error if the property does not exist. However, if an IfMissing is provided, then this value is used instead (e.g., : version || Unknown will return a Unknown if there is no version scalar property, and / cell / gene : UMIs || 0 will return an all-zero matrix if there is no UMIs matrix property).\n\nAccessing a VECTOR_PROPERTY allows for more complex operations.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_ENTRY","page":"Queries","title":"Daf.Queries.VECTOR_ENTRY","text":"VECTOR_ENTRY := Axis IsEqual VECTOR_LOOKUP\n\nLookup the scalar value of some entry of a vector property of some axis (e.g., / gene = FOX1 : is_marker will return whether the FOX1 gene is a marker gene).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_ENTRY","page":"Queries","title":"Daf.Queries.MATRIX_ENTRY","text":"MATRIX_ENTRY := Axis IsEqual Axis IsEqual LOOKUP_PROPERTY\n\nLookup the scalar value of the named entry of a matrix property (e.g., / gene = FOX1 / cell = ATGC : UMIs will return the number of UMIs of the FOX1 gene of the ATGC cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.REDUCE_VECTOR","page":"Queries","title":"Daf.Queries.REDUCE_VECTOR","text":"REDUCEVECTOR := [`VECTORQUERY](@ref) [ReductionOperation](@ref) [IfMissing`](@ref)?\n\nPerform an arbitrary vector query, and reduce the result into a single scalar value (e.g., / donor : age %> Mean will compute the mean age of the ages of the donors).\n\nBy default, it is an error if the vector query results in an empty vector. However, if an IfMissing suffix is provided, then this value is used instead (e.g., / cell & type = LMPP : age %> Mean || 0 will return zero if there are no cells whose type is LMPP).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_QUERY","page":"Queries","title":"Daf.Queries.VECTOR_QUERY","text":"VECTOR_QUERY := ( VECTOR_PROPERTY | MATRIX_ROW | MATRIX_COLUMN | REDUCE_MATRIX ) POST_PROCESS*\n\nA query returning a vector can be one of:\n\nLooking up the value of a vector property (e.g., / gene : is_marker will return a mask of the marker genes).\nPicking a single row or column of a matrix property (e.g., / gene = FOX1 / cell : UMIs will return a vector of the UMIs of the FOX1 gene of all the cells).\nReducing each column of some matrix into a scalar, resulting in a vector (e.g., / gene / cell : UMIs %> Sum will compute the sum of the UMIs of all the genes in each cell).\n\nEither way, this can be followed by further processing of the vector (e.g., / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of one plus the of the UMIs of each gene in each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_PROPERTY","page":"Queries","title":"Daf.Queries.VECTOR_PROPERTY","text":"VECTOR_PROPERTY := Axis AXIS_MASK* [VECTOR_LOOKUP] VECTOR_FETCH*\n\nLookup the values of some vector property (e.g., / gene : is_marker will return a mask of the marker genes). This can be restricted to a subset of the vector using masks (e.g., / gene & is_marker : is_noisy will return a mask of the noisy genes out of the marker genes), and/or fetch the property value from indirect axes (e.g., / cell : batch => donor => age will return the age of the donor of the batch of each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_LOOKUP","page":"Queries","title":"Daf.Queries.VECTOR_LOOKUP","text":"VECTOR_LOOKUP := Lookup IfMissing? ( IfNot | AsAxis )?\n\nA [Lookup] of a vector property (e.g., / cell : type will return the type of each cell).\n\nBy default, it is an error if the property does not exist. However, if an IfMissing is provided, then this value is used instead (e.g., / cell : type || Unknown will return a vector of Unknown types if there is no type property for the cell axis).\n\nIf the IfNot suffix is provided, it controls how to modify \"false-ish\" (empty string, zero numeric value, or false Boolean value) entries (e.g., / cell : type ? will return a vector of the type of each cell that has a non-empty type, while / cell : type ? Outlier will return a vector of the type of each cell, where cells with an empty type are given the type Outlier).\n\nOnly when the vector property is used for CountBy or for GroupBy, providing the AsAxis suffix indicates that the property is associated with an axis (similar to an indirect axis in Fetch), and the set of groups is forced to be the values of that axis; in this case, empty string values are always ignored (e.g., / cell : age @ type ! %> Mean || 0 will return a vector of the mean age of the cells of each type, with a value of zero for types which have no cells, and ignoring cells which have an empty type; similarly, / cell : batch => donor ! * type ! will return a matrix whose rows are donors and columns are types, counting the number of cells of each type that were sampled from each donor, ignoring cells which have an empty type or whose batch has an empty donor).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_ROW","page":"Queries","title":"Daf.Queries.MATRIX_ROW","text":"MATRIX_ROW := Axis IsEqual Axis AXIS_MASK* Lookup\n\nLookup the values of a single row of a matrix property, eliminating the rows axis (e.g., / gene = FOX1 / cell : UMIs will evaluate to a vector of the UMIs of the FOX1 gene of all the cells).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_COLUMN","page":"Queries","title":"Daf.Queries.MATRIX_COLUMN","text":"MATRIX_COLUMN := Axis AXIS_MASK* Axis IsEqual Lookup\n\nLookup the values of a single column of a matrix property, eliminating the columns axis (e.g., / gene / cell = ATGC : UMIs will evaluate to a vector of the UMIs of all the genes of the ATGC cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.REDUCE_MATRIX","page":"Queries","title":"Daf.Queries.REDUCE_MATRIX","text":"REDUCE_MATRIX := MATRIX_QUERY ReductionOperation\n\nPerform an arbitrary matrix query, and reduce the result into a vector by converting each column into a single value, eliminating the rows axis (e.g., / gene / cell : UMIs %> Sum will evaluate to a vector of the total UMIs of each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_QUERY","page":"Queries","title":"Daf.Queries.MATRIX_QUERY","text":"MATRIX_QUERY := ( MATRIX_LOOKUP | COUNTS_MATRIX ) POST_PROCESS*\n\nA query returning a matrix can be one of:\n\nLooking up the value of a matrix property (e.g., / gene / cell : UMIs will return the matrix of UMIs for each gene and cell).\nCounting the number of times each combination of two vector properties occurs in the data (e.g., / cell : batch => donor => age * type will return a matrix whose rows are ages and columns are types, where each entry contains the number of cells which have the specific type and age).\n\nEither way, this can be followed by a series of EltwiseOperation to modify the results (e.g., / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of 1 plus the UMIs of each gene in each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_LOOKUP","page":"Queries","title":"Daf.Queries.MATRIX_LOOKUP","text":"MATRIX_LOOKUP := Axis AXIS_MASK* Axis AXIS_MASK* Lookup\n\nLookup the values of some matrix property (e.g., / gene / cell : UMIs will return the matrix of UMIs of each gene in each cell). This can be restricted to a subset of the vector using masks (e.g., / gene & is_marker / cell & type = LMPP : UMIs will return a matrix of the UMIs of each marker gene in cells whose type is LMPP).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.COUNTS_MATRIX","page":"Queries","title":"Daf.Queries.COUNTS_MATRIX","text":"COUNTS_MATRIX := VECTOR_QUERY CountBy VECTOR_FETCH*\n\nCompute a matrix of counts of each combination of values given two vectors (e.g., / cell : batch => donor => age * batch => donor => sex will return a matrix whose rows are ages and columns are sexes, where each entry contains the number of cells which have the specific age and sex).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.POST_PROCESS","page":"Queries","title":"Daf.Queries.POST_PROCESS","text":"POST_PROCESS := EltwiseOperation | GROUP_BY\n\nA vector or a matrix result may be processed by one of:\n\nApplying an EltwiseOperation operation to each value (e.g., / donor : age % Log base 2 will compute the log base 2 of the ages of all donors, and / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of 1 plus the UMIs count of each gene in each cell).\nReducing each group of vector entries or matrix rows into a single value (e.g., / cell : batch => donor => age @ type %> Mean will compute a vector of the mean age of the cells of each type, and / cell / gene : UMIs @ type %> Mean will compute a matrix of the mean UMIs of each gene for the cells of each type).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.GROUP_BY","page":"Queries","title":"Daf.Queries.GROUP_BY","text":"GROUP_BY := GroupBy VECTOR_FETCH* ReductionOperation IfMissing\n\nThe entries of a vector or the rows of a matrix result may be grouped, where all the values that have the same group value are reduced to a single value using a ReductionOperation (e.g., / cell : batch => donor => age @ type %> Mean will compute the mean age of all the cells of each type, and / cell / gene : UMIs @ type %> Mean will compute a matrix of the mean UMIs of each gene for the cells of each type).\n\nIf the group property is suffixed by AsAxis, then the result will have a value for each entry of the axis (e.g., / cell : age @ type ! %> Mean will compute the mean age of the cells of each type). In this case, some groups may have no values at all, which by default, is an error. Providing an IfMissing suffix will use the specified value for such empty groups instead (e.g., / cell : age @ type ! %> Mean || 0 will compute the mean age for the cells of each type, with a zero value for types for which there are no cells).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.AXIS_MASK","page":"Queries","title":"Daf.Queries.AXIS_MASK","text":"AXIS_MASK := MASK_OPERATION ( VECTOR_FETCH )* ( ComparisonOperation )?\n\nRestrict the set of entries of an axis to lookup results for (e.g., / gene & is_marker). If the mask is based on a non-Bool property, it is converted to a Boolean by comparing with the empty string or a zero value (depending on its data type); alternatively, you can explicitly compare it with a value (e.g., / cell & batch => donor => age > 1).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MASK_OPERATION","page":"Queries","title":"Daf.Queries.MASK_OPERATION","text":"MASK_OPERATION := And | AndNot | Or | OrNot | Xor | XorNot\n\nA query operation for restricting the set of entries of an Axis. The mask operations are applied to the current mask, so if several operations are applied, they are applied in order from left to right (e.g., / gene & is_marker | is_noisy &! is_lateral will first restrict the set of genes to marker genes, then expand it to include noisy genes as well, then remove all the lateral genes; this would be different from / gene & is_marker &! is_lateral | is_noisy, which will include all noisy genes even if they are lateral).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_FETCH","page":"Queries","title":"Daf.Queries.VECTOR_FETCH","text":"VECTOR_FETCH := AsAxis? Fetch IfMissing? ( IfNot | AsAxis )?\n\nFetch the value of a property of an indirect axis. That is, there is a common pattern where one axis (e.g., cell) has a property (e.g., type) which has the same name as an axis, and whose values are (string) entry names of that axis. In this case, we often want to lookup a property of the other axis (e.g., / cell : type => color will evaluate to a vector of the color of the type of each cell). Sometimes one walks a chain of such properties (e.g., / cell : batch => donor => age).\n\nSometimes it is needed to store several alternate properties that refer to the same indirect axis. In this case, the name of the property can begin with the axis name, followed by . and a suffix (e.g., / cell : type.manual => color will fetch the color of the manual type of each cell, still using the type axis).\n\nIf the property does not follow this convention, it is possible to manually specify the name of the axis using an AsAxis prefix (e.g., / cell : manual ! type => color will assume the value of the manual property is a vector of names of entries of the type axis).\n\nAs usual, if the property does not exist, this is an error, unless an IfMissing suffix is provided (e.g., / cell : type || red => color will assign all cells the color red if the type property does not exist).\n\nIf the value of the property is the empty string for some vector entries, by default this is again an error (as the empty string is not one of the values of the indirect axis). If an IfNot suffix is provided, such entries can be removed from the result (e.g., / cell : type ? => color will return a vector of the colors of the cells which have a non-empty type), or can be given an specific value (e.g., / cell : type ? red => color will return a vector of a color for each cell, giving the red color to cells with an empty type).\n\nWhen using IfMissing and/or IfNot, the default value provided is always of the final value (e.g., / cell : batch || -1 ? -2 => donor || -3 ? -4 => age || -5 ? -6 will compute a vector if age per cell; if there's no batch property, all cells will get the age -1). If there is such property, then cells with an empty batch will get the age -2. For cells with a non-empty batch, if there's no donor property, they will get the value -3. If there is such a property, cells with an empty donor will get the value -4. Finally, for cells with a batch and donor, if there is no age property, they will be given an age of -5. Otherwise, if their age is zero, it will be changed to -6.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.ComparisonOperation","page":"Queries","title":"Daf.Queries.ComparisonOperation","text":"ComparisonOperation := ( IsLess | IsLessEqual | IsEqual | IsNotEqual | IsGreater | IsGreaterEqual | IsMatch | IsNotMatch )\n\nA query operation computing a mask by comparing the values of a vector with some constant (e.g., / cell & age > 0). In addition, the IsEqual operation can be used to slice an entry from a vector (e.g., / gene = FOX1 : is_marker) or a matrix (e.g., / cell / gene = FOX1 & UMIs, / cell = ATGC / gene = FOX1 : UMIs).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.guess_typed_value","page":"Queries","title":"Daf.Queries.guess_typed_value","text":"guess_typed_value(value::AbstractString)::StorageScalar\n\nGiven a string value, guess the typed value it represents:\n\ntrue and false are assumed to be Bool.\nIntegers are assumed to be Int64.\nFloating point numbers are assumed to be Float64, as are e and pi.\nAnything else is assumed to be a string.\n\nThis doesn't have to be 100% accurate; it is intended to allow omitting the data type in most cases when specifying an IfMissing value. If it guesses wrong, just specify an explicit type (e.g., @ version || 1.0 String).\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Query-Operators","page":"Queries","title":"Query Operators","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.QuerySequence\n\nDaf.Queries.And\nDaf.Queries.AndNot\nDaf.Queries.AsAxis\nDaf.Queries.Axis\nDaf.Queries.CountBy\nDaf.Queries.Fetch\nDaf.Queries.GroupBy\nDaf.Queries.IfMissing\nDaf.Queries.IfNot\nDaf.Queries.IsEqual\nDaf.Queries.IsGreater\nDaf.Queries.IsGreaterEqual\nDaf.Queries.IsLess\nDaf.Queries.IsLessEqual\nDaf.Queries.IsMatch\nDaf.Queries.IsNotEqual\nDaf.Queries.IsNotMatch\nDaf.Queries.Lookup\nDaf.Queries.Names\nDaf.Queries.Or\nDaf.Queries.OrNot\nDaf.Queries.Xor\nDaf.Queries.XorNot","category":"page"},{"location":"queries.html#Daf.Queries.QuerySequence","page":"Queries","title":"Daf.Queries.QuerySequence","text":"struct QuerySequence{N} <: Query where {N}\n\nA sequence of N QueryOperations.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.And","page":"Queries","title":"Daf.Queries.And","text":"And(property::AbstractString) <: QueryOperation\n\nA query operation for restricting the set of entries of an Axis. In a string Query, this is specified using the & operator, followed by the name of an axis property to look up to compute the mask.\n\nThe mask may be just the fetched property (e.g., / gene & is_marker will restrict the result vector to only marker genes). If the value of the property is not Boolean, it is automatically compared to 0 or the empty string, depending on its type (e.g., / cell & type will restrict the result vector to only cells which were given a non-empty-string type annotation). It is also possible to fetch properties from other axes, and use an explicit ComparisonOperation to compute the Boolean mask (e.g., / cell & batch => age > 1 will restrict the result vector to cells whose batch has an age larger than 1).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.AndNot","page":"Queries","title":"Daf.Queries.AndNot","text":"AndNot(property::AbstractString) <: QueryOperation\n\nSame as And but use the inverse of the mask. In a string Query, this is specified using the &! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.AsAxis","page":"Queries","title":"Daf.Queries.AsAxis","text":"AsAxis([axis::AbstractString = nothing]) <: QueryOperation\n\nThere are three cases where we may want to take a vector property and consider each value to be the name of an entry of some axis: Fetch, CountBy and GroupBy. In a string Query, this is indicated by the ! operators, optionally followed by the name of the axis to use.\n\nWhen using Fetch, we always lookup in some axis, so AsAxis is implied (e.g., / cell : type => color is identical to / cell : type ! => color). In contrast, when using CountBy and GroupBy, one has to explicitly specify AsAxis to force using all the entries of the axis for the counting or grouping (e.g., / cell : age @ type %> Mean will return a vector of the mean age of every type that has cells associated with it, while / cell : age @ type ! %> Mean will return a vector of the mean age of each and every value of the type axis; similarly, / cell : type * age will generate a counts matrix whose rows are types that have cells associated with them, while / cell : type ! * age will generate a counts matrix whose rows are exactly the entries of the type axis).\n\nSince the set of values is fixed by the axis matching the vector property, it is possible that, when using this for GroupBy, some groups would have no values, causing an error. This can be avoided by providing an IfMissing suffix to the reduction (e.g., / cell : age @ type ! %> Mean will fail if some type has no cells associated with it, while / cell : age @ type ! %> Mean || 0 will give such types a zero mean age).\n\nTypically, the name of the base property is identical to the name of the axis. In this case, there is no need to specify the name of the axis (as in the examples above). Sometimes it is useful to be able to store several vector properties which all map to the same axis. To support this, we support a naming convention where the property name begins with the axis name followed by a .suffix. (e.g., both / cell : type => color and / cell : type.manual => color will look up the color of the type of some property of the cell axis - either \"the\" type of each cell, or the alternate type.manual of each cell).\n\nIf the property name does not follow the above conventions, then it is possible to explicitly specify the name of the axis (e.g., / cell : manual ! type => color will consider each value of the manual property as the name of an entry of the type axis and look up the matching color property value of this axis).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Axis","page":"Queries","title":"Daf.Queries.Axis","text":"Axis(axis::AbstractString) <: QueryOperation\n\nA query operation for specifying a result axis. In a string Query, this is specified using the / operator followed by the axis name.\n\nThis needs to be specified at least once for a vector query (e.g., / cell : batch), and twice for a matrix (e.g., / cell / gene : UMIs). Axes can be filtered using Boolean masks using And, AndNot, Or, OrNot, Xor and XorNot (e.g., / gene & is_marker : is_noisy). Alternatively, a single entry can be selected from the axis using IsEqual (e.g., / gene = FOX1 : is_noisy, / cell / gene = FOX1 : UMIs, / cell = C1 / gene = FOX1 : UMIs). Finally, a matrix can be reduced into a vector, and a vector to a scalar, using ReductionOperation (e.g., / gene / cell : UMIs %> Sum %> Mean).\n\nnote: Note\nThis, Names and Lookup are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.CountBy","page":"Queries","title":"Daf.Queries.CountBy","text":"CountBy(property::AbstractString) <: QueryOperation\n\nA query operation that generates a matrix of counts of combinations of pairs of values for the same entries of an axis. That is, it follows fetching some vector property, and is followed by fetching a second vector property of the same axis. The result is a matrix whose rows are the values of the 1st property and the columns are the values of the 2nd property, and the values are the number of times the combination of values appears. In a string Query, this is specified using the * operator, followed by the property name to look up (e.g., / cell : type * batch will generate a matrix whose rows correspond to cell types, whose columns correspond to cell batches, and whose values are the number of cells of each combination of batch and type).\n\nBy default, the rows and/or columns only contain actually seen values and are ordered alphabetically. However, it is common that one or both of the properties correspond to an axis. In this case, you can use an AsAxis suffix to force the rows and/or columns of the matrix to be exactly the entries of the specific axis (e.g., / cell : type ! * batch will generate a matrix whose rows are exactly the entries of the type axis, even if there is a type without any cells). This is especially useful when both properties are axes, as the result can be stored as a matrix property (e.g., / cell : type ! * batch ! will generate a matrix whose rows are the entries of the type axis, and whose columns are the entries of the batch axis, so it can be given to set_matrix!(daf, \"type\", \"batch\", ...)).\n\nThe raw counts matrix can be post-processed like any other matrix (using ReductionOperation or an EltwiseOperation). This allows computing useful aggregate properties (e.g., / cell : type * batch % Fractions will generate a matrix whose columns correspond to batches and whose rows are the fraction of the cells from each type within each batch).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Fetch","page":"Queries","title":"Daf.Queries.Fetch","text":"Fetch(property::AbstractString) <: QueryOperation\n\nA query operation for fetching the value of a property from another axis, based on a vector property whose values are entry names of the axis. In a string Query, this is specified using the => operator, followed by the name to look up.\n\nThat is, if you query for the values of a vector property (e.g., batch for each cell), and the name of this property is identical to some axis name, then we assume each value is the name of an entry of this axis. We use this to fetch the value of some other property (e.g., age) of that axis (e.g., / cell : batch => age).\n\nIt is useful to be able to store several vector properties which all map to the same axis. To support this, we support a naming convention where the property name begins with the axis name followed by a .suffix. (e.g., both / cell : type => color and / cell : type.manual => color will look up the color of the type of some property of the cell axis - either \"the\" type of each cell, or the alternate type.manual of each cell).\n\nFetching can be chained (e.g., / cell : batch => donor => age will fetch the age of the donor of the batch of each cell).\n\nIf the property does not exist, this is an error, unless this is followed by IfMissing (e.g., / cell : type => color || red). If the property contains an empty value, this is also an error, unless it is followed by an IfNot (e.g., / cell : type ? => color will compute a vector of the colors of the type of the cells that have a non-empty type, and / cell : batch ? 0 => donor => age will assign a zero age for cells which have an empty batch).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.GroupBy","page":"Queries","title":"Daf.Queries.GroupBy","text":"GroupBy(property::AbstractString) <: QueryOperation\n\nA query operation that uses a (following) ReductionOperation to aggregate the values of each group of values. Will fetch the specified property_name (possibly followed by additional Fetch operations) and use the resulting vector for the name of the group of each value.\n\nIf applied to a vector, the result is a vector with one entry per group (e.g., / cell : age @ type %> Mean will generate a vector with an entry per cell type and whose values are the mean age of the cells of each type). If applied to a matrix, the result is a matrix with one row per group (e.g., / cell / gene : UMIs @ type %> Max will generate a matrix with one row per type and one column per gene, whose values are the maximal UMIs count of the gene in the cells of each type).\n\nBy default, the result uses only group values we actually observe, in sorted order. However, if the operation is followed by an AsAxis suffix, then the fetched property must correspond to an existing axis (similar to when using Fetch), and the result will use the entries of the axis, even if we do not observe them in the data (and will ignore vector entries with an empty value). In this case, the reduction operation will fail if there are no values for some group, unless it is followed by an IfMissing suffix (e.g., / cell : age @ type ! %> Mean will generate a vector whose entries are all the entries of the type axis, and will ignore cells with an empty type; this will fail if there are types which are not associated with any cell. In contrast, / cell : age @ type ! %> Mean || 0 will succeed, assigning a value of zero for types which have no cells associated with them).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IfMissing","page":"Queries","title":"Daf.Queries.IfMissing","text":"IfMissing(value::StorageScalar; dtype::Maybe{Type} = nothing) <: QueryOperation\n\nA query operation providing a value to use if the data is missing some property. In a string Query, this is specified using the || operator, followed by the value to use, and optionally followed by the data type of the value (e.g., : score || 1 Float32).\n\nIf the data type is not specified, and the value isa AbstractString, then the data type is deduced using guess_typed_value of the value.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IfNot","page":"Queries","title":"Daf.Queries.IfNot","text":"IfNot(value::Maybe{StorageScalar} = nothing) <: QueryOperation\n\nA query operation providing a value to use for \"false-ish\" values in a vector (empty strings, zero numeric values, or false Boolean values). In a string Query, this is indicated using the ?? operator, optionally followed by a value to use.\n\nIf the value is nothing (the default), then these entries are dropped (masked out) of the result (e.g., / cell : type ? behaves the same as / cell & type : type, that is, returns the type of the cells which have a non-empty type). Otherwise, this value is used instead of the \"false-ish\" value (e.g., / cell : type ? Outlier will return a vector of the type of each cell, with the value Outlier for cells with an empty type). When fetching properties, this is the final value (e.g., / cell : type ? red => color will return a vector of the color of the type of each cell, with a red color for the cells with an empty type).\n\nIf the value isa AbstractString, then it is automatically converted to the data type of the elements of the results vector.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsEqual","page":"Queries","title":"Daf.Queries.IsEqual","text":"IsEqual(value::StorageScalar) <: QueryOperation\n\nEquality is used for two purposes:\n\nAs a comparison operator, similar to IsLess except that uses = instead of < for the comparison.\nTo select a single entry from a vector. This allows a query to select a single scalar from a vector (e.g., / gene = FOX1 : is_marker) or from a matrix (e.g., / cell = ATGC / gene = FOX1 : UMIs); or to slice a single vector from a matrix (e.g., / cell = ATGC / gene : UMIs or / cell / gene = FOX1 : UMIs).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsGreater","page":"Queries","title":"Daf.Queries.IsGreater","text":"IsGreater(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses > instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsGreaterEqual","page":"Queries","title":"Daf.Queries.IsGreaterEqual","text":"IsGreaterEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses >= instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsLess","page":"Queries","title":"Daf.Queries.IsLess","text":"IsLess(value::StorageScalar) <: QueryOperation\n\nA query operation for converting a vector value to a Boolean mask by comparing it some value. In a string Query, this is specified using the < operator, followed by the value to compare with.\n\nA string value is automatically converted into the same type as the vector values (e.g., / cell & probability < 0.5 will restrict the result vector only to cells whose probability is less than half).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsLessEqual","page":"Queries","title":"Daf.Queries.IsLessEqual","text":"IsLessEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses <= instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsMatch","page":"Queries","title":"Daf.Queries.IsMatch","text":"IsMatch(value::Union{AbstractString, Regex}) <: QueryOperation\n\nSimilar to IsLess except that the compared values must be strings, and the mask is of the values that match the given regular expression.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsNotEqual","page":"Queries","title":"Daf.Queries.IsNotEqual","text":"IsNotEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses != instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsNotMatch","page":"Queries","title":"Daf.Queries.IsNotMatch","text":"IsNotMatch(value::Union{AbstractString, Regex}) <: QueryOperation\n\nSimilar to IsMatch except that looks for entries that do not match the pattern.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Lookup","page":"Queries","title":"Daf.Queries.Lookup","text":"Lookup(property::AbstractString) <: Query\n\nA query operation for looking up the value of a property with some name. In a string Query, this is specified using the : operator, followed by the property name to look up.\n\nIf the query state is empty, this looks up the value of a scalar property (e.g., : version).\nIf the query state contains a single axis, this looks up the value of a vector property (e.g., / cell : batch).\nIf the query state contains two axes, this looks up the value of a matrix property (e.g., / cell / gene : UMIs).\n\nIf the property does not exist, this is an error, unless this is followed by IfMissing (e.g., : version || 1.0).\n\nIf any of the axes has a single entry selected using [IsEqual]@(ref), this will reduce the dimension of the result (e.g., / cell / gene = FOX1 : UMIs is a vector, and both / cell = C1 / gene = FOX1 : UMI and / gene = FOX1 : is_marker are scalars).\n\nnote: Note\nThis, Names and Axis are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Names","page":"Queries","title":"Daf.Queries.Names","text":"Names(kind::Maybe{AbstractString} = nothing) <: Query\n\nA query operation for looking up a set of names. In a string Query, this is specified using the ? operator, optionally followed by the kind of objects to name.\n\nIf the query state is empty, a kind must be specified, one of scalars or axes, and the result is the set of their names (? scalars, ? axes).\nIf the query state contains a single axis (without any masks), the kind must not be specified, and the result is the set of names of vector properties of the axis (e.g., / cell ?).\nIf the query state contains two axes (without any masks), the kind must not be specified, and the result is the set of names of matrix properties of the axes (e.g., / cell / gene ?).\n\nnote: Note\nThis, Lookup and Axis are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Or","page":"Queries","title":"Daf.Queries.Or","text":"Or(property::AbstractString) <: QueryOperation\n\nA query operation for expanding the set of entries of an Axis. In a string Query, this is specified using the | operator, followed by the name of an axis property to look up to compute the mask.\n\nThis works similarly to And, except that it adds to the mask (e.g., / gene & is_marker | is_noisy will restrict the result vector to either marker or noisy genes).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.OrNot","page":"Queries","title":"Daf.Queries.OrNot","text":"OrNot(property::AbstractString) <: QueryOperation\n\nSame as Or but use the inverse of the mask. In a string Query, this is specified using the |! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Xor","page":"Queries","title":"Daf.Queries.Xor","text":"Xor(property::AbstractString) <: QueryOperation\n\nA query operation for flipping the set of entries of an Axis. In a string Query, this is specified using the ^ operator, followed by the name of an axis property to look up to compute the mask.\n\nThis works similarly to Or, except that it flips entries in the mask (e.g., / gene & is_marker ^ is_noisy will restrict the result vector to either marker or noisy genes, but not both).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.XorNot","page":"Queries","title":"Daf.Queries.XorNot","text":"XorNot(property::AbstractString) <: QueryOperation\n\nSame as Xor but use the inverse of the mask. In a string Query, this is specified using the ^! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Index","page":"Queries","title":"Index","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Pages = [\"queries.md\"]","category":"page"},{"location":"index.html#Daf","page":"Daf","title":"Daf","text":"","category":"section"},{"location":"index.html","page":"Daf","title":"Daf","text":"Daf.Daf","category":"page"},{"location":"index.html#Daf.Daf","page":"Daf","title":"Daf.Daf","text":"The Daf.jl package provides a uniform generic interface for accessing 1D and 2D data arranged along some set of axes. This is a much-needed generalization of the AnnData functionality. The key features are:\n\nThe data model StorageTypes include (1) some axes with named entries, (2) vector data indexed by a single axis, (3) matrix data indexed by a pair of axes, and also (4) scalar data (anything not tied to some axis).\nExplicit control over 2D data MatrixLayouts (row or column major), with support for both dense and sparse matrices, both of which are crucial for performance.\nOut of the box, allow storing the data in memory (using MemoryDaf), directly inside HDF5 files (using H5df), or as a collection of simple files in a directory (using FilesDaf), which works nicely with tools like make for automating computation pipelines.\nImport and export to/from AnnDataFormat for interoperability with non-Daf tools.\nImplementation with a focus on memory-mapping to allow for efficient processing of large data sets (in theory, larger than the system's memory). In particular, merely opening a data set is a fast operation (almost) regardless of its size.\nWell-defined interfaces for implementing additional storage Formats.\nCreating Chains of data sets, allowing zero-copy reuse of common data between multiple computation pipelines.\nConcat multiple data sets into a single data set along one or more axes.\nA Query language for accessing the data, providing features such as slicing, aggregation and filtering, and making Views and Copies based on these queries.\nSelf documenting Computations with an explicit Contracts describing and enforcing the inputs and outputs, and Adapters for applying the computation to data of a different format.\n\nThe top-level Daf module re-exports all(most) everything from the sub-modules, so you can directly access any exported symbol by using Daf (or import Daf: MemoryDaf), instead of having to import or use qualified names (such as Daf.MemoryFormat.MemoryDaf).\n\nThe Daf data sets type hierarchy looks like this:\n\nDafReader (abstract type)\n├─ DafReadOnly (abstract type)\n│ ├─ DafReadOnlyWrapper (created by read_only)\n│ ├─ DafView (created by viewer)\n│ └─ DafChainReader (created by chain_reader)\n└─ DafWriter (abstract type)\n ├─ DafChainWriter (created by chain_writer)\n ├─ MemoryDaf\n ├─ FilesDaf\n └─ H5df\n\n\n\n\n\n","category":"module"},{"location":"index.html#Index","page":"Daf","title":"Index","text":"","category":"section"},{"location":"index.html","page":"Daf","title":"Daf","text":"","category":"page"},{"location":"adapters.html#Adapters","page":"Adapters","title":"Adapters","text":"","category":"section"},{"location":"adapters.html","page":"Adapters","title":"Adapters","text":"Daf.Adapters\nDaf.Adapters.adapter","category":"page"},{"location":"adapters.html#Daf.Adapters","page":"Adapters","title":"Daf.Adapters","text":"Adapt Daf data to a @computation.\n\n\n\n\n\n","category":"module"},{"location":"adapters.html#Daf.Adapters.adapter","page":"Adapters","title":"Daf.Adapters.adapter","text":"adapter(\n computation::Function,\n view::Union{DafWriter, DafReadOnly},\n [name::Maybe{AbstractString} = nothing,\n capture=MemoryDaf,\n axes::Maybe{ViewAxes} = nothing,\n data::Maybe{ViewData} = nothing,\n empty::Maybe{EmptyData} = nothing,\n relayout::Bool = true,\n overwrite::Bool = false]\n)::Any\n\nInvoke a computation on a view data set and return the result; copy a viewer of the updated data set into the base Daf data of the view. If specified, the name is used as a prefix for all the names; otherwise, the view name is used as the prefix.\n\nIf you have some Daf data you wish to run a computation on, you need to deal with name mismatches. That is, the names of the input and output data properties of the computation may be different from these used in your data. In addition, you might be interested only in a subset of the computed data properties, to avoiding polluting your data set with irrelevant properties.\n\nTo address these issues, the common idiom for applying computations to Daf data is to use the adapter as follows:\n\nCreate a (read-only) view of your data which presents the data properties under the names expected by the computation, using viewer. If the computation was annotated by @computation, then its Contract will be explicitly documented so you will know exactly what to provide.\nPass this view to adapter, which will invoke the computation with a (writable) adapted version of the data (created using chain_writer and a new DafWriter to capture the output; by default, this will be a [MemoryDaf]@(ref)), but it can be any function that takes a name (named) parameter and returns a DafWriter.\nOnce the computation is done, create a new view of the output, which presents the subset of the output data properties you are interested in, with the names you would like to store them as. Again, if the computation was annotated by @computation, then its Contract will be explicitly documented so you will know exactly what to expect.\nCopy this output view data into the base Daf data of the view (using copy_all!, empty, relayout (default: true) and overwrite (default: false).\n\nnote: Note\nIf the names of the properties in the input already match the contract of the computation, you can pass the data set directly as the view. The call to adapter may still be needed to filter or rename the computation's output. If the outputs can also be used as-is, then there's no need to invoke adapter; directly apply the computation to the data and be done.\n\nTypically the code would look something like this:\n\ndaf = ... # Some input `Daf` data we wish to compute on.\n\n# Here `daf` contains the inputs for the computation, but possibly\n# under a different name.\n\nresult = adapter(\n viewer(daf; ...), # How to view the input in the way expected by the computation.\n name = \"example\", # A name to use to generate the temporary `Daf` data names.\n axes = ..., data = ..., # How and what to view from the output for copying back into `daf`.\n empty = ..., # If the input view specifies a subset of some axes.\n) do adapted # The writable adapted data we can pass to the computation.\n computation(adapted, ...) # Actually do the computation.\n return ... # An additional result outside `daf`.\nend\n\n# Here `daf` will contain the specific renamed outputs specified in `adapter`,\n# and you can also access the additional non-`daf` data `result`.\n\nThis idiom allows @computation functions to use clear generic names for their inputs and outputs, and still apply them to arbitrary data sets using more specific names. One can even invoke the same computation with different parameter values, and store the different results in the same data set under different names.\n\n\n\n\n\n","category":"function"},{"location":"adapters.html#Index","page":"Adapters","title":"Index","text":"","category":"section"},{"location":"adapters.html","page":"Adapters","title":"Adapters","text":"Pages = [\"adapter.md\"]","category":"page"},{"location":"h5df_format.html#H5DF-Format","page":"H5DF Format","title":"H5DF Format","text":"","category":"section"},{"location":"h5df_format.html","page":"H5DF Format","title":"H5DF Format","text":"Daf.H5dfFormat\nDaf.H5dfFormat.MAJOR_VERSION\nDaf.H5dfFormat.MINOR_VERSION\nDaf.H5dfFormat.H5df","category":"page"},{"location":"h5df_format.html#Daf.H5dfFormat","page":"H5DF Format","title":"Daf.H5dfFormat","text":"A Daf storage format in an HDF5 disk file. This is the \"native\" way to store Daf data in HDF5 files, which can be used to contain \"anything\", as HDF5 is essentially \"a filesystem inside a file\", with \"groups\" instead of directories and \"datasets\" instead of files. Therefore HDF5 is very generic, and there are various specific formats which use specific internal structure to hold some data in it - for example, h5ad files have a specific internal structure for representing AnnData objects. To represent Daf data in HDF5 storage, we use the following internal structure (which is not compatible with h5ad):\n\nThe HDF5 file may contain Daf data directly in the root group, in which case, it is restricted to holding just a single Daf data set. When using such a file, you automatically access the single Daf data set contained in it. By convention such files are given a .h5df suffix.\nAlternatively, the HDF5 file may contain Daf data inside some arbitrary group, in which case, there's no restriction on the content of other groups in the file. Such groups may contain other Daf data (allowing for multiple Daf data sets in a single file), and/or non-Daf data. When using such a file, you need to specify the name of the group that contains the Daf data set you are interested it. By convention, at least if such files contain \"mostly\" (or only) Daf data sets, they are given a .h5dfs suffix, and are accompanied by some documentation describing the top-level groups in the file.\nUnder the Daf data group, there are 4 sub-groups: scalars, axes, vectors and matrices and a daf dataset.\nTo future-proof the format, the daf dataset will contain a vector of two integers, the first acting as the major version number and the second as the minor version number, using semantic versioning. This makes it easy to test whether some group in an HDF5 file does/n't contain Daf data, and which version of the internal structure it is using. Currently the only defined version is [1,0].\nThe scalars group contains scalar properties, each as its own \"dataset\". The only supported scalar data types are these included in StorageScalar. If you really need something else, serialize it to JSON and store the result as a string scalar. This should be extremely rare.\nThe axes group contains a \"dataset\" per axis, which contains a vector of strings (the names of the axis entries).\nThe vectors group contains a sub-group for each axis. Each such sub-group contains vector properties. If the vector is dense, it is stored directly as a \"dataset\". Otherwise, it is stored as a group containing two vector \"datasets\": nzind is containing the indices of the non-zero values, and nzval containing the actual values. See Julia's SparseVector implementation for details. The only supported vector element types are these included in StorageScalar, same as StorageVector.\nThe matrices group contains a sub-group for each rows axis, which contains a sub-group for each columns axis. Each such sub-sub group contains matrix properties. If the matrix is dense, it is stored directly as a \"dataset\" (in column-major layout). Otherwise, it is stored as a group containing three vector \"datasets\": colptr containing the indices of the rows of each column in rowval, rowval containing the indices of the non-zero rows of the columns, and nzval containing the non-zero matrix entry values. See Julia's SparseMatrixCSC implementation for details. The only supported matrix element types are these included in StorageNumber - this explicitly excludes matrices of strings, same as StorageMatrix.\nAll vectors and matrices are stored in a contiguous way in the file, which allows us to efficiently memory-map them.\n\nThat's all there is to it. Due to the above restrictions on types and layout, the metadata provided by HDF5 for each \"dataset\" is sufficient to fully describe the data, and one should be able to directly access it using any HDF5 API in any programming language, if needed. Typically, however, it is easiest to simply use the Julia Daf package to access the data.\n\nExample HDF5 structure:\n\nexample-daf-dataset-root-group/\n├─ daf\n├─ scalars/\n│ └─ version\n├─ axes/\n│ ├─ cell\n│ └─ gene\n├─ vectors/\n│ ├─ cell/\n│ │ └─ batch\n│ └─ gene/\n│ └─ is_marker\n└─ matrices/\n ├─ cell/\n │ ├─ cell/\n │ └─ gene/\n │ └─ UMIs/\n │ ├─ colptr\n │ ├─ rowval\n │ └─ nzval\n └─ gene/\n ├─ cell/\n └─ gene/\n\nnote: Note\nWhen creating an HDF5 file to contain Daf data, you should specify ;fapl=HDF5.FileAccessProperties(;alignment=(1,8)). This ensures all the memory buffers are properly aligned for efficient access. Otherwise, memory mapping will be much less efficient. A warning is therefore generated whenever you try to access Daf data stored in an HDF5 file which does not enforce proper alignment.\n\nnote: Note\nDeleting data from an HDF5 file does not reuse the abandoned storage. In general if you want to reclaim that storage, you will need to repack the file, which will invalidate any memory-mapped buffers created for it. Therefore, if you delete data (e.g. using delete_vector!), you should eventually abandon the H5df object, repack the HDF5 file, then create a new H5df object to access the repacked data.\n\nnote: Note\nThe code here assumes the HDF5 data obeys all the above conventions and restrictions (that said, code will be able to access vectors and matrices stored in unaligned, chunked and/or compressed formats, but this will be much less efficient). As long as you only create and access Daf data in HDF5 files using H5df, then the code will work as expected (assuming no bugs). However, if you do this in some other way (e.g., directly using some HDF5 API in some arbitrary programming language), and the result is invalid, then the code here may fails with \"less than friendly\" error messages.\n\n\n\n\n\n","category":"module"},{"location":"h5df_format.html#Daf.H5dfFormat.MAJOR_VERSION","page":"H5DF Format","title":"Daf.H5dfFormat.MAJOR_VERSION","text":"The specific major version of the H5df format that is supported by this code (1). The code will refuse to access data that is stored in a different major format.\n\n\n\n\n\n","category":"constant"},{"location":"h5df_format.html#Daf.H5dfFormat.MINOR_VERSION","page":"H5DF Format","title":"Daf.H5dfFormat.MINOR_VERSION","text":"The maximal minor version of the H5df format that is supported by this code (0). The code will refuse to access data that is stored with the expected major version (1), but that uses a higher minor version.\n\nnote: Note\nModifying data that is stored with a lower minor version number may increase its minor version number.\n\n\n\n\n\n","category":"constant"},{"location":"h5df_format.html#Daf.H5dfFormat.H5df","page":"H5DF Format","title":"Daf.H5dfFormat.H5df","text":"H5df(\n root::Union{AbstractString, HDF5.File, HDF5.Group},\n mode::AbstractString = \"r\";\n [name::Maybe{AbstractString} = nothing]\n)\n\nStorage in a HDF5 file.\n\nThe root can be the path of an HDF5 file, which will be opened with the specified mode, or an opened HDF5 file, in which cases the Daf data set will be stored directly in the root of the file (by convention, using a .h5df file name suffix). Alternatively, the root can be a group inside an HDF5 file, which allows to store multiple Daf data sets inside the same HDF5 file (by convention, using a .h5dfs file name suffix).\n\nWhen opening an existing data set, if name is not specified, and there exists a \"name\" scalar property, it is used as the name. Otherwise, the path of the HDF5 file will be used as the name, followed by the internal path of the group (if any).\n\nThe valid mode values are as follows (the default mode is r):\n\nMode Allow modifications? Create if does not exist? Truncate if exists? Returned type\nr No No No DafReadOnly\nr+ Yes No No H5df\nw+ Yes Yes No H5df\nw Yes Yes Yes H5df\n\nnote: Note\nIf specifying a path (string) root, when calling h5open, the file alignment of created files is set to (1, 8) to maximize efficiency of mapped vectors and matrices, and the w+ mode is converted to cw.\n\n\n\n\n\n","category":"type"},{"location":"h5df_format.html#Index","page":"H5DF Format","title":"Index","text":"","category":"section"},{"location":"h5df_format.html","page":"H5DF Format","title":"H5DF Format","text":"Pages = [\"h5df_format.md\"]","category":"page"}] +[{"location":"storage_types.html#Storage-types","page":"Storage types","title":"Storage types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes","category":"page"},{"location":"storage_types.html#Daf.StorageTypes","page":"Storage types","title":"Daf.StorageTypes","text":"Only a restricted set of scalar, matrix and vector types is stored by Daf.\n\nThe set of scalar types is restricted because we need to be able to store them in disk files. This rules out compound types such as Dict. This isn't an issue for vector and matrix elements but is sometimes bothersome for \"scalar\" data (not associated with any axis). If you find yourself needed to store such data, you'll have to serialize it to a string. By convention, we use JSON blobs for such data to maximize portability between different systems.\n\nJulia supports a potentially infinite variety of ways to represent matrices and vectors. Daf is intentionally restricted to specific representations. This has several advantages:\n\nDaf storage formats need only implement storing these restricted representations, which lend themselves to simple storage in consecutive bytes (in memory and/or on disk). These representations also allow for memory-mapping the data from disk files, which allows Daf to deal with data sets larger than the available memory.\nClient code need only worry about dealing with these restricted representations, which limits the amount of code paths required for efficient algorithm implementations. However, you (mostly) need not worry about this when invoking library functions, which have code paths covering all common matrix types. You do need to consider the layout of the data, though (see below).\n\nThis has the downside that Daf doesn't support efficient storage of specialized matrices (to pick a random example, upper triangular matrices). This isn't a great loss, since Daf targets storing arbitrary scientific data (especially biological data), which in general is not of any such special shape. The upside is that all matrices stored and returned by Daf have a clear MatrixLayouts (regardless of whether they are dense or sparse). This allows user code to ensure it is working \"with the grain\" of the data, which is much more efficient.\n\n\n\n\n\n","category":"module"},{"location":"storage_types.html#Storable-types","page":"Storage types","title":"Storable types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes.StorageInteger\nDaf.StorageTypes.StorageFloat\nDaf.StorageTypes.StorageNumber\nDaf.StorageTypes.StorageScalar\nDaf.StorageTypes.StorageScalarBase\nDaf.StorageTypes.StorageVector\nDaf.StorageTypes.StorageMatrix","category":"page"},{"location":"storage_types.html#Daf.StorageTypes.StorageInteger","page":"Storage types","title":"Daf.StorageTypes.StorageInteger","text":"StorageInteger = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64}\n\nInteger number types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageFloat","page":"Storage types","title":"Daf.StorageTypes.StorageFloat","text":"StorageFloat = Union{Float32, Float64}\n\nFloating point number types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageNumber","page":"Storage types","title":"Daf.StorageTypes.StorageNumber","text":"StorageNumber = Union{Bool, StorageInteger, StorageFloat}\n\nNumber types that can be used as scalars, or elements in stored matrices or vectors.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageScalar","page":"Storage types","title":"Daf.StorageTypes.StorageScalar","text":"StorageScalar = Union{StorageNumber, S} where {S <: AbstractString}\n\nTypes that can be used as scalars, or elements in stored matrices or vectors.\n\nThis is restricted to StorageNumber (including Booleans) and strings. It is arguably too restrictive, as in principle we could support any arbitrary isbitstype. However, in practice this would cause much trouble when accessing the data from other systems (specifically Python and R). Since Daf targets storing scientific data (especially biological data), as opposed to \"anything at all\", this restriction seems reasonable.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageScalarBase","page":"Storage types","title":"Daf.StorageTypes.StorageScalarBase","text":"StorageScalarBase = Union{StorageNumber, AbstractString}\n\nFor using in where clauses when a type needs to be a StorageScalar. That is, write where {T <: StorageScalarBase} instead of where {T <: StorageScalar}, because of the limitations of Julia's type system.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageVector","page":"Storage types","title":"Daf.StorageTypes.StorageVector","text":"StorageVector{T} = AbstractVector{T} where {T <: StorageScalar}\n\nVectors that can be directly stored (and fetched) from Daf storage.\n\nThe element type must be a StorageScalar, to allow storing the data in disk files. Vectors of strings are supported but will be less efficient.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Daf.StorageTypes.StorageMatrix","page":"Storage types","title":"Daf.StorageTypes.StorageMatrix","text":"StorageMatrix{T} = AbstractMatrix{T} where {T <: StorageNumber}\n\nMatrices that can be directly stored (and fetched) from Daf storage.\n\nThe element type must be a StorageNumber, to allow efficient storage of the data in disk files. That is, matrices of strings are not supported.\n\nnote: Note\nAll matrices we store must have a clear MatrixLayouts, that is, must be in either row-major or column-major format.\n\n\n\n\n\n","category":"type"},{"location":"storage_types.html#Constructors","page":"Storage types","title":"Constructors","text":"","category":"section"},{"location":"storage_types.html#Storable-types-2","page":"Storage types","title":"Storable types","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Daf.StorageTypes.sparse_vector\nDaf.StorageTypes.sparse_matrix_csc","category":"page"},{"location":"storage_types.html#Daf.StorageTypes.sparse_vector","page":"Storage types","title":"Daf.StorageTypes.sparse_vector","text":"sparse_vector(dense::StorageMatrix)::SparseVector\n\nCreate a sparse vector using the smallest unsigned integer type needed for this size of matrix.\n\n\n\n\n\n","category":"function"},{"location":"storage_types.html#Daf.StorageTypes.sparse_matrix_csc","page":"Storage types","title":"Daf.StorageTypes.sparse_matrix_csc","text":"sparse_matrix_csc(dense::StorageMatrix)::SparseMatrixCSC\n\nCreate a sparse matrix using the smallest unsigned integer type needed for this size of matrix.\n\n\n\n\n\n","category":"function"},{"location":"storage_types.html#Index","page":"Storage types","title":"Index","text":"","category":"section"},{"location":"storage_types.html","page":"Storage types","title":"Storage types","text":"Pages = [\"storage_types.md\"]","category":"page"},{"location":"generic_functions.html#Generic-Functions","page":"Generic Functions","title":"Generic Functions","text":"","category":"section"},{"location":"generic_functions.html","page":"Generic Functions","title":"Generic Functions","text":"Daf.GenericFunctions\nDaf.GenericFunctions.dedent\nDaf.GenericFunctions.AbnormalHandler\nDaf.GenericFunctions.handle_abnormal","category":"page"},{"location":"generic_functions.html#Daf.GenericFunctions","page":"Generic Functions","title":"Daf.GenericFunctions","text":"Functions that arguably should belong in a more general-purpose package.\n\nWe do not re-export the functions and supporting types defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericFunctions.\n\n\n\n\n\n","category":"module"},{"location":"generic_functions.html#Daf.GenericFunctions.dedent","page":"Generic Functions","title":"Daf.GenericFunctions.dedent","text":"dedent(string::AbstractString; indent::AbstractString = \"\")::String\n\nGiven a possibly multi-line string with a common indentation in each line, strip this indentation from all lines, and replace it with indent. Will also strip any initial and/or final line breaks.\n\n\n\n\n\n","category":"function"},{"location":"generic_functions.html#Daf.GenericFunctions.AbnormalHandler","page":"Generic Functions","title":"Daf.GenericFunctions.AbnormalHandler","text":"The action to take when encountering an \"abnormal\" (but recoverable) operation.\n\nValid values are:\n\nIgnoreHandler - ignore the issue and perform the recovery operation.\n\nWarnHandler - emit a warning using @warn.\n\nErrorHandler - abort the program with an error message.\n\n\n\n\n\n","category":"type"},{"location":"generic_functions.html#Daf.GenericFunctions.handle_abnormal","page":"Generic Functions","title":"Daf.GenericFunctions.handle_abnormal","text":"handle_abnormal(message::Function, handler::AbnormalHandler)::Nothing\n\nCall this when encountering some abnormal, but recoverable, condition. Follow it by the recovery code.\n\nThis will error if the handler is ErrorHandler, and abort the program. If it is WarnHandler, it will just @warn and return. If it is IgnoreHandler it will just return.\n\nThe message is a function that should return an AbstractString to use. For efficiency, it is not invoked if ignoring the condition.\n\n\n\n\n\n","category":"function"},{"location":"generic_functions.html#Index","page":"Generic Functions","title":"Index","text":"","category":"section"},{"location":"generic_functions.html","page":"Generic Functions","title":"Generic Functions","text":"Pages = [\"generic_functions.md\"]","category":"page"},{"location":"tokens.html#Tokens","page":"Tokens","title":"Tokens","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens","category":"page"},{"location":"tokens.html#Daf.Tokens","page":"Tokens","title":"Daf.Tokens","text":"The only exported functions from this module are escape_value and unescape_value which are useful when embedding values into query strings. The rest of the module is documented to give insight into how a query string is broken into Tokens.\n\nIdeally Daf should have used some established parser generator module for parsing queries, making all this unnecessary. However, As of writing this code, Julia doesn't seem to have such a parser generator solution. Therefore, this module provides a simple tokenize function with rudimentary pattern matching which is all we need to parse queries (whose structure is \"trivial\").\n\n\n\n\n\n","category":"module"},{"location":"tokens.html#Escaping","page":"Tokens","title":"Escaping","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.escape_value\nDaf.Tokens.unescape_value\nDaf.Tokens.is_value_char\nDaf.Tokens.VALUE_REGEX","category":"page"},{"location":"tokens.html#Daf.Tokens.escape_value","page":"Tokens","title":"Daf.Tokens.escape_value","text":"escape_value(value::AbstractString)::String\n\nGiven some raw value (name of an axis, axis entry or property, or a parameter value), which may contain special characters, return an escaped version to be used as a single value Token.\n\nWe need to consider the following kinds of characters:\n\nSafe (is_value_char) characters include a - z, A - Z, 0 - 9, _, +, -, and ., as well as any non-ASCII (that is, Unicode) characters. Any sequence of these characters will be considered a single value Token. These cover all the common cases (including signed integer and floating point values).\nAll other ASCII characters are (at least potentially) special, that is, may be used to describe an operation.\nPrefixing any character with a \\ allows using it inside a value Token. This is useful if some name or value contains a special character. For example, if you have a cell whose name is ACTG:Plate1, and you want to access the name of the batch of this specific cell, you will have to write / cell = ACTG\\:Plate1 : batch.\n\nnote: Note\nThe \\ character is also used by Julia inside \"...\" string literals, to escape writing non-printable characters. For example, \"\\n\" is a single-character string containing a line break, and therefore \"\\\\\" is used to write a single \\. Thus the above example would have to be written as \"cell = ACTG\\\\:Plate1 : batch\". This isn't nice.Luckily, Julia also has raw\"...\" string literals that work similarly to Python's r\"...\" strings (in Julia, r\"...\" is a regular expression, not a string). Inside raw string literals, a \\ is a \\ (unless it precedes a \"). Therefore the above example could also be written as raw\"/ cell = ACTG\\:Plate1 : batch, which is more readable.\n\nBack to escape_value - it will prefix any special character with a \\. It is useful if you want to programmatically inject a value. Often this happens when using $(...) to embed values into a query string, e.g., do not write a query / $(axis) @ $(property) as it is unsafe, as any of the embedded variables may contain unsafe characters. You should instead write something like / $(escape_value(axis)) @ $(escape_value(property)).\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.unescape_value","page":"Tokens","title":"Daf.Tokens.unescape_value","text":"unescape_value(escaped::AbstractString)::String\n\nUndo escape_value, that is, given an escaped value with a \\ characters escaping special characters, drop the \\ to get back the original string value.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.is_value_char","page":"Tokens","title":"Daf.Tokens.is_value_char","text":"is_value_char(character::Char)::Bool\n\nReturn whether a character is safe to use inside a value Token (name of an axis, axis entry or property, or a parameter value).\n\nThe safe characters are a - z, A - Z, 0 - 9, _, +, -, and ., as well as any non-ASCII (that is, Unicode) characters.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.VALUE_REGEX","page":"Tokens","title":"Daf.Tokens.VALUE_REGEX","text":"VALUE_REGEX = r\"^(?:[0-9a-zA-Z_.+-]|[^\\x00-\\xFF])+\"\n\nA sequence of is_value_char is considered to be a single value Token. This set of characters was chosen to allow expressing numbers, Booleans and simple names. Any other (ASCII, non-space) character may in principle be used as an operator (possibly in a future version of the code). Therefore, use escape_value to protect any value you embed into the expression.\n\n\n\n\n\n","category":"constant"},{"location":"tokens.html#Encoding","page":"Tokens","title":"Encoding","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.encode_expression\nDaf.Tokens.decode_expression","category":"page"},{"location":"tokens.html#Daf.Tokens.encode_expression","page":"Tokens","title":"Daf.Tokens.encode_expression","text":"encode_expression(expr_string::AbstractString)::String\n\nGiven an expression string to parse, encode any non-ASCII (that is, Unicode) character, as well as any character escaped by a \\, such that the result will only use is_value_char characters. Every encoded character is replaced by _XX using URI encoding, but replacing the % with a _ so we can deal with unescaped % as an operator, so we also need to encode _ as _5F, so we need to encode \\_ as _5C_5F. Isn't encoding fun?\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.decode_expression","page":"Tokens","title":"Daf.Tokens.decode_expression","text":"decode_expression(encoded_string::AbstractString)::String\n\nGiven the results of encode_expression, decode it back to its original form.\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Tokenization","page":"Tokens","title":"Tokenization","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Daf.Tokens.Token\nDaf.Tokens.tokenize\nDaf.Tokens.SPACE_REGEX","category":"page"},{"location":"tokens.html#Daf.Tokens.Token","page":"Tokens","title":"Daf.Tokens.Token","text":"struct Token\n is_operator::Bool\n value::AbstractString\n token_index::Int\n first_index::Int\n last_index::Int\n encoded_string::AbstractString\nend\n\nA parsed token of an expression.\n\nWe distinguish between \"value\" tokens and \"operator\" tokens using is_operator. A value token holds the name of an axis, axis entry or property, or a parameter value, while an operator token is used to identify a query operation to perform. In both cases, the value contains the token string. This goes through both decode_expression and unescape_value so it can be directly used as-is for value tokens.\n\nWe also keep the location (first_index .. last_index) and the (encoded) expression string, to enable generating friendly error messages. There are no line numbers in locations because in Daf we squash our queries to a single-line, under the assumption they are \"relatively simple\". This allows us to simplify the code.\n\n\n\n\n\n","category":"type"},{"location":"tokens.html#Daf.Tokens.tokenize","page":"Tokens","title":"Daf.Tokens.tokenize","text":"tokenize(string::AbstractString, operators::Regex)::Vector{Token}\n\nGiven an expression string, convert it into a vector of Token.\n\nWe first convert everything that matches the SPACE_REGEX into a single space. This squashed the expression into a single line (discarding line breaks and comments), and the squashed expression is used for reporting errors. This is reasonable for dealing with Daf queries which are expected to be \"relatively simple\".\n\nWhen tokenizing, we discard the spaces. Anything that matches the VALUE_REGEX is considered to be a value Token. Anything that matches the operators is considered to be an operator Token. As a special case, '' is converted to an empty string, which is otherwise impossible to represent (write \\'\\' to prevent this). Anything else is reported as an invalid character.\n\nnote: Note\nThe operators regex should only match the start of the string (that is, must start with ^). Also, when using |, you need to list the longer operators first (e.g., ^(?:++|+) as opposed to ^(?:+|++)).\n\n\n\n\n\n","category":"function"},{"location":"tokens.html#Daf.Tokens.SPACE_REGEX","page":"Tokens","title":"Daf.Tokens.SPACE_REGEX","text":"SPACE_REGEX = r\"(?:[\\s\\n\\r]|#[^\\n\\r]*(?:[\\r\\n]|$))+\"sm\n\nOptional white space can separate Token. It is required when there are two consecutive value tokens, but is typically optional around operators. White space includes spaces, tabs, line breaks, and a # ... comment suffix of a line.\n\n\n\n\n\n","category":"constant"},{"location":"tokens.html#Index","page":"Tokens","title":"Index","text":"","category":"section"},{"location":"tokens.html","page":"Tokens","title":"Tokens","text":"Pages = [\"tokens.md\"]","category":"page"},{"location":"chains.html#Chains","page":"Chains","title":"Chains","text":"","category":"section"},{"location":"chains.html","page":"Chains","title":"Chains","text":"Daf.Chains\nDaf.Chains.chain_reader\nDaf.Chains.chain_writer\nDaf.Chains.ReadOnlyChain\nDaf.Chains.WriteChain","category":"page"},{"location":"chains.html#Daf.Chains","page":"Chains","title":"Daf.Chains","text":"View a chain of Daf data as a single data set. This allows creating a small Daf data set that contains extra (or overriding) data on top of a larger read-only data set. In particular this allows creating several such incompatible extra data sets (e.g., different groupings of cells to metacells), without having to duplicate the common (read only) data.\n\n\n\n\n\n","category":"module"},{"location":"chains.html#Daf.Chains.chain_reader","page":"Chains","title":"Daf.Chains.chain_reader","text":"chain_reader(dafs::AbstractVector{F}; name::Maybe{AbstractString} = nothing)::DafReader where {F <: DafReader}\n\nCreate a read-only chain wrapper of DafReaders, presenting them as a single DafReader. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets. However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\nnote: Note\nWhile this verifies the axes are consistent at the time of creating the chain, it's no defense against modifying the chained data after the fact, creating inconsistent axes. Don't do that.\n\n\n\n\n\n","category":"function"},{"location":"chains.html#Daf.Chains.chain_writer","page":"Chains","title":"Daf.Chains.chain_writer","text":"chain_writer(dafs::AbstractVector{F}; name::Maybe{AbstractString} = nothing)::DafWriter where {F <: DafReader}\n\nCreate a chain wrapper for a chain of DafReader data, presenting them as a single DafWriter. This acts similarly to chain_reader, but requires the final entry to be a DafWriter. Any modifications or additions to the chain are directed at this final writer.\n\nnote: Note\nDeletions are only allowed for data that exists only in the final writer. That is, it is impossible to delete from a chain something that exists in any of the readers; it is only possible to override it.\n\n\n\n\n\n","category":"function"},{"location":"chains.html#Daf.Chains.ReadOnlyChain","page":"Chains","title":"Daf.Chains.ReadOnlyChain","text":"struct ReadOnlyChain <: DafReadOnly ... end\n\nA wrapper for a chain of DafReader data, presenting them as a single DafReadOnly. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets. However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\n\n\n\n\n","category":"type"},{"location":"chains.html#Daf.Chains.WriteChain","page":"Chains","title":"Daf.Chains.WriteChain","text":"struct WriteChain <: DafWriter ... end\n\nA wrapper for a chain of DafReader data, with a final [DafWriter], presenting them as a single DafWriter. When accessing the content, the exposed value is that provided by the last data set that contains the data, that is, later data sets can override earlier data sets (where the writer has the final word). However, if an axis exists in more than one data set in the chain, then its entries must be identical. This isn't typically created manually; instead call chain_reader.\n\nAny modifications or additions to the chain are directed at the final writer. Deletions are only allowed for data that exists only in this writer. That is, it is impossible to delete from a chain something that exists in any of the readers; it is only possible to override it.\n\n\n\n\n\n","category":"type"},{"location":"chains.html#Index","page":"Chains","title":"Index","text":"","category":"section"},{"location":"chains.html","page":"Chains","title":"Chains","text":"Pages = [\"chains.md\"]","category":"page"},{"location":"registry.html#Operations-registry","page":"Operations registry","title":"Operations registry","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry","category":"page"},{"location":"registry.html#Daf.Registry","page":"Operations registry","title":"Daf.Registry","text":"Registering element-wise and reduction operations is required, to allow them to be used in a query.\n\nnote: Note\nWe do not re-export everything from here to the main Daf namespace, as it is only of interest for implementers of new query operations. Most users of Daf just stick with the (fairly comprehensive) list of built-in query operations so there's no need to pollute their namespace with these detail.\n\n\n\n\n\n","category":"module"},{"location":"registry.html#Element-wise-operations","page":"Operations registry","title":"Element-wise operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.EltwiseOperation\nDaf.Registry.compute_eltwise","category":"page"},{"location":"registry.html#Daf.Registry.EltwiseOperation","page":"Operations registry","title":"Daf.Registry.EltwiseOperation","text":"Abstract type for all element-wise operations.\n\nIn a string query, this is specified using the % operator (e.g., % Abs, % Log base 2):\n\nEltwiseOperation := % operation ( parameter value )*\n\nSince each EltwiseOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Lookup(\"age\") |> Abs()). For this there should be other constructor(s) tailored for this usage.\n\nAn element-wise operation may be applied to scalar, vector ot matrix data. It will preserve the shape of the data, but changes the value(s), and possibly the data type of the elements. For example, Abs will compute the absolute value of each value.\n\nTo implement a new such operation, the type is expected to be of the form:\n\nstruct MyOperation <: EltwiseOperation\n ... optional parameters ...\nend\n@query_operation MyOperation\n\nMyOperation(operation_name::Token, parameter_values::Dict{String, Token})::MyOperation\n\nThe constructor should use parse_parameter for each of the parameters (for example, using parse_number_assignment). In addition you will need to invoke @query_operation to register the operation so it can be used in a query, and implement the functions listed below. See the query operations module for details and examples.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.compute_eltwise","page":"Operations registry","title":"Daf.Registry.compute_eltwise","text":"compute_eltwise(operation::EltwiseOperation, input::StorageMatrix)::StorageMatrix\ncompute_eltwise(operation::EltwiseOperation, input::StorageVector)::StorageVector\ncompute_eltwise(operation::EltwiseOperation, input_value::Number)::StorageNumber\n\nCompute an EltwiseOperation operation.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Reduction-operations","page":"Operations registry","title":"Reduction operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.ReductionOperation\nDaf.Registry.compute_reduction\nDaf.Registry.reduction_result_type","category":"page"},{"location":"registry.html#Daf.Registry.ReductionOperation","page":"Operations registry","title":"Daf.Registry.ReductionOperation","text":"Abstract type for all reduction operations.\n\nIn a string query, this is specified using the %> operator (e.g., %> Sum, %> Quantile fraction 0.05):\n\nReductionOperation := %> operation ( parameter value )*\n\nSince each ReductionOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Axis(\"gene\") |> Lookup(\"UMIs\") |> Quantile(0.05)). For this there should be other constructor(s) tailored for this usage.\n\nA reduction operation may be applied to matrix or vector data. It will reduce (eliminate) one dimension of the data, and possibly the result will have a different data type than the input. When applied to a vector, the operation will return a scalar. When applied to a matrix, it assumes the matrix is in column-major layout, and will return a vector with one entry per column, containing the result of reducing the column to a scalar.\n\nTo implement a new such operation, the type is expected to be of the form:\n\nstruct MyOperation <: ReductionOperation\n ... optional parameters ...\nend\n\nMyOperation(operation_name::Token, parameter_values::Dict{String, Token})::MyOperation\n\nThe constructor should use parse_parameter for each of the parameters (for example, using typically parse_number_assignment). In addition you will need to invoke @query_operation to register the operation so it can be used in a query, and implement the functions listed below. See the query operations module for details and examples.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.compute_reduction","page":"Operations registry","title":"Daf.Registry.compute_reduction","text":"compute_reduction(operation::ReductionOperation, input::StorageMatrix)::StorageVector\ncompute_reduction(operation::ReductionOperation, input::StorageVector)::StorageNumber\n\nSince each ReductionOperation isa QueryOperation, you can directly apply it to a query (e.g., Axis(\"cell\") |> Axis(\"gene\") |> Lookup(\"UMIs\") |> Sum()). For this there should be other constructor(s) tailored for this usage.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Daf.Registry.reduction_result_type","page":"Operations registry","title":"Daf.Registry.reduction_result_type","text":"reduction_result_type(operation::ReductionOperation, eltype::Type)::Type\n\nReturn the data type of the result of the reduction operation if applied to a vector of the specified eltype.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Registering-operations","page":"Operations registry","title":"Registering operations","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Daf.Registry.QueryOperation\nDaf.Registry.@query_operation\nDaf.Registry.register_query_operation","category":"page"},{"location":"registry.html#Daf.Registry.QueryOperation","page":"Operations registry","title":"Daf.Registry.QueryOperation","text":"Abstract interface for all query operations. An actual query is a series of these operations which, when applied to Daf data, compute some result.\n\n\n\n\n\n","category":"type"},{"location":"registry.html#Daf.Registry.@query_operation","page":"Operations registry","title":"Daf.Registry.@query_operation","text":"struct MyOperation <: EltwiseOperation # Or <: ReductionOperation\n ...\nend\n@query_operation MyOperation\n\nAutomatically call register_query_operation for MyOperation.\n\nNote this will import Daf.Registry.register_query_operation, so it may only be called from the top level scope of a module.\n\n\n\n\n\n","category":"macro"},{"location":"registry.html#Daf.Registry.register_query_operation","page":"Operations registry","title":"Daf.Registry.register_query_operation","text":"register_query_operation(\n type::Type{T},\n source_file::AbstractString,\n source_line::Integer,\n)::Nothing where {T <: Union{EltwiseOperation, ReductionOperation}}\n\nRegister a specific operation so it would be available inside queries. This is required to be able to parse the operation. This is idempotent (safe to invoke multiple times).\n\nThis isn't usually called directly. Instead, it is typically invoked by using the @query_operation macro.\n\n\n\n\n\n","category":"function"},{"location":"registry.html#Index","page":"Operations registry","title":"Index","text":"","category":"section"},{"location":"registry.html","page":"Operations registry","title":"Operations registry","text":"Pages = [\"registry.md\"]","category":"page"},{"location":"computations.html#Computations","page":"Computations","title":"Computations","text":"","category":"section"},{"location":"computations.html","page":"Computations","title":"Computations","text":"Daf.Computations\nDaf.Computations.@computation\nDaf.Computations.DEFAULT\nDaf.Computations.CONTRACT\nDaf.Computations.CONTRACT1\nDaf.Computations.CONTRACT2","category":"page"},{"location":"computations.html#Daf.Computations","page":"Computations","title":"Daf.Computations","text":"Support writing \"well-behaved\" computations. Such computations declare a Contract describing their inputs and outputs. This is enforced, so that the implementation need not worry about missing inputs, and the caller can rely on the results. It is also self-documenting, so the generated documentation is always contains a clear up-to-date description of the contract.\n\n\n\n\n\n","category":"module"},{"location":"computations.html#Daf.Computations.@computation","page":"Computations","title":"Daf.Computations.@computation","text":"@computation function something(...)\n return ...\nend\n\n@computation Contract(...) function something(daf::DafWriter, ...)\n return ...\nend\n\n@computation Contract(...) Contract(...) function something(\n first::DafReader/DafWriter, second::DafReader/DafWriter, ...\n)\n return ...\nend\n\nMark a function as a Daf computation. This has the following effects:\n\nIt verifies that the Daf data satisfies the Contract, when the computation is invoked and when it is complete (using verify_input and verify_output).\nIt stashes the contract(s) (if any) in a global variable. This allows expanding CONTRACT in the documentation string (for a single contract case), or CONTRACT1 and CONTRACT2 (for the dual contract case).\nIt stashes the default value of named arguments. This allows expanding DEFAULT in the documentation string, which is especially useful if these defaults are computed, read from global constants, etc.\nIt logs the invocation of the function (using @debug), including the actual values of the named arguments (using depict).\n\nnote: Note\nFor each Contract parameter (if any), there needs to be a DafReader or DafWriter, which the contract(s) will be applied to. These parameters should be the initial positional parameters of the function.\n\n\n\n\n\n","category":"macro"},{"location":"computations.html#Daf.Computations.DEFAULT","page":"Computations","title":"Daf.Computations.DEFAULT","text":"When using @computation:\n\n'''\n something(daf::DafWriter, x::Int = $(DEFAULT.x); y::Bool = $(DEFAULT.y))\n\n...\nIf `x` (default: $(DEFAULT.y)) is even, ...\n...\nIf `y` (default: $(DEFAULT.y)) is set, ...\n...\n'''\n@computation Contract(...)\nfunction something(daf::DafWriter, x::Int = 0; y::Bool = false)\n return ...\nend\n\nThen $(DEFAULT.x) will be expanded with the default value of the parameter x. It is good practice to contain a description of the effects of each parameter somewhere in the documentation, and it is polite to also provide its default value. This can be done in either the signature line or in the text, or both. Using DEFAULT ensures that the correct value is used in the documentation.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT","page":"Computations","title":"Daf.Computations.CONTRACT","text":"When using @computation:\n\n'''\n...\n# Contract\n...\n$(CONTRACT)\n...\n'''\n@computation Contract(...)\nfunction something(daf::DafWriter, ...)\n return ...\nend\n\nThen $(CONTRACT) will be expanded with a description of the Contract. This is based on DocStringExtensions.\n\nnote: Note\nThe first argument of the function must be a DafWriter, which the contract will be applied to.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT1","page":"Computations","title":"Daf.Computations.CONTRACT1","text":"Same as CONTRACT, but reference the contract for the 1st Daf argument for a @computation with two such arguments.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Daf.Computations.CONTRACT2","page":"Computations","title":"Daf.Computations.CONTRACT2","text":"Same as CONTRACT, but reference the contract for the 2nd Daf argument for a @computation with two such arguments.\n\n\n\n\n\n","category":"constant"},{"location":"computations.html#Index","page":"Computations","title":"Index","text":"","category":"section"},{"location":"computations.html","page":"Computations","title":"Computations","text":"Pages = [\"computations.md\"]","category":"page"},{"location":"reconstruction.html#Reconstruction","page":"Reconstruction","title":"Reconstruction","text":"","category":"section"},{"location":"reconstruction.html","page":"Reconstruction","title":"Reconstruction","text":"Daf.Reconstruction\nDaf.Reconstruction.reconstruct_axis!","category":"page"},{"location":"reconstruction.html#Daf.Reconstruction","page":"Reconstruction","title":"Daf.Reconstruction","text":"Reconstruct implicit axes. Due to AnnData two-axes limitations, other axes are often represented by storing their expanded data (e.g., a type for each cell, and a color for each cell, where the color is actually per type). When converting such data to Daf, it is useful to reconstruct such axes (e.g., create a type axis, assign a color for each type, and delete the per-cell color property).\n\n\n\n\n\n","category":"module"},{"location":"reconstruction.html#Daf.Reconstruction.reconstruct_axis!","page":"Reconstruction","title":"Daf.Reconstruction.reconstruct_axis!","text":"reconstruct_axis!(\n daf::DafWriter;\n existing_axis::AbstractString,\n implicit_axis::AbstractString,\n [rename_axis::Maybe{AbstractString} = nothing,\n empty_implicit::Maybe{StorageScalar} = nothing,\n implicit_properties::Maybe{AbstractStringSet} = nothing,\n properties_defaults::Maybe{AbstractDict} = nothing]\n)::AbstractDict{<:AbstractString, Maybe{StorageScalar}}\n\nGiven an existing_axis in daf, which has a property implicit_axis, create a new axis with the same name (or, if specified, call it rename_axis). If empty_implicit is specified, this value of the property is replaced by the empty string (indicate there is no value associated with the existing_axis entry). For each of the implicit_properties, we collect the mapping between the implicit_axis and the property values, and store it as a property of the newly created axis.\n\nIf the implicit_axis already exists, we verify that all the values provided for it by the existing_axis do, in fact, exist as names of entries in the implicit_axis. This allows manually creating the implicit_axis with additional entries that are not currently in use.\n\nnote: Note\nIf the implicit_axis already exists and contains entries that aren't currently in use, you must specify properties_defaults for the values of these entries of the reconstructed properties.Due to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:properties_defaults::Maybe{AbstractDict{<:AbstractString, <:StorageScalar}} = nothingBut what we are forced to say is:properties_defaults::Maybe{Dict} = nothingGlory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\nIf implicit_properties are explicitly specified, then we require the mapping from implicit_axis to be consistent. Otherwise, we look at all the properties of the existing_axis, and check for each one whether the mapping is consistent; if it is, we migrate the property to the new axis. For example, when importing AnnData containing per-cell data, it isn't always clear which property is actually per-batch (e.g., cell age) and which is actually per cell (e.g., doublet score). Not specifying the implicit_properties allows the function to figure it out on its own.\n\nnote: Note\nFor each converted property, the value associated with existing_axis entries which have no implicit_axis value (that is, have an empty string or empty_implicit value) is lost. For example, if each cell type has a color, but some cells do not have a type, then the color of \"cells with no type\" is lost. We still require this value to be consistent, and return a mapping between each migrated property name and the value of such entries (if any exist). When reconstructing the original property, specify this value using IfNot (e.g., / cell : type => color ?? magenta).\n\n\n\n\n\n","category":"function"},{"location":"reconstruction.html#Index","page":"Reconstruction","title":"Index","text":"","category":"section"},{"location":"reconstruction.html","page":"Reconstruction","title":"Reconstruction","text":"Pages = [\"reconstruction.md\"]","category":"page"},{"location":"contracts.html#Contracts","page":"Contracts","title":"Contracts","text":"","category":"section"},{"location":"contracts.html","page":"Contracts","title":"Contracts","text":"Daf.Contracts\nDaf.Contracts.Contract\nDaf.Contracts.ContractAxes\nDaf.Contracts.ContractData\nDaf.Contracts.ContractExpectation\nDaf.Contracts.verify_input\nDaf.Contracts.verify_output","category":"page"},{"location":"contracts.html#Daf.Contracts","page":"Contracts","title":"Daf.Contracts","text":"Enforce input and output contracts of computations using Daf data.\n\n\n\n\n\n","category":"module"},{"location":"contracts.html#Daf.Contracts.Contract","page":"Contracts","title":"Daf.Contracts.Contract","text":"Contract(;\n [axes::Maybe{ContractAxes} = nothing,\n data::Maybe{ContractData} = nothing]\n)::Contract\n\nThe contract of a computational tool, specifing the ContractAxes and ContractData.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractAxes","page":"Contracts","title":"Daf.Contracts.ContractAxes","text":"A vector of pairs where the key is the axis name and the value is a tuple of the ContractExpectation and a description of the axis (for documentation). Axes are listed mainly for documentation; axes of required or guaranteed vectors or matrices are automatically required or guaranteed to match. However it is considered polite to explicitly list the axes with their descriptions so the documentation of the contract will be complete.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ContractAxes = AbstractVector{Pair{AbstractString, Tuple{ContractExpectation, AbstractString}}}But what we are forced to say is:ContractAxes = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractData","page":"Contracts","title":"Daf.Contracts.ContractData","text":"A vector of pairs where the key is a DataKey identifying some data property, and the value is a tuple of the ContractExpectation, the expected data type, and a description (for documentation).\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ContractData = AbstractVector{Pair{DataKey, Tuple{ContractExpectation, Type, AbstractString}}}But what we are forced to say is:ContractData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.ContractExpectation","page":"Contracts","title":"Daf.Contracts.ContractExpectation","text":"The expectation from a specific property for a computation on Daf data.\n\nInput data:\n\nRequiredInput - data that must exist in the data when invoking the computation, will be used as input.\n\nOptionalInput - data that, if existing in the data when invoking the computation, will be used as an input.\n\nOutput data:\n\nGuaranteedOutput - data that is guaranteed to exist when the computation is done.\n\nOptionalOutput - data that may exist when the computation is done, depending on some condition, which may include the existence of optional input and/or the value of parameters to the computation, and/or the content of the data.\n\n\n\n\n\n","category":"type"},{"location":"contracts.html#Daf.Contracts.verify_input","page":"Contracts","title":"Daf.Contracts.verify_input","text":"verify_input(daf::DafReader, contract::Contract, computation::AbstractString)::Nothing\n\nVerify the daf data when a computation is invoked. This verifies that all the required data exists and is of the appropriate type, and that if any of the optional data exists, it has the appropriate type.\n\n\n\n\n\n","category":"function"},{"location":"contracts.html#Daf.Contracts.verify_output","page":"Contracts","title":"Daf.Contracts.verify_output","text":"verify_output(daf::DafReader, contract::Contract, computation::AbstractString)::Nothing\n\nVerify the daf data when a computation is complete. This verifies that all the guaranteed output data exists and is of the appropriate type, and that if any of the optional output data exists, it has the appropriate type.\n\n\n\n\n\n","category":"function"},{"location":"contracts.html#Index","page":"Contracts","title":"Index","text":"","category":"section"},{"location":"contracts.html","page":"Contracts","title":"Contracts","text":"Pages = [\"contracts.md\"]","category":"page"},{"location":"messages.html#Messages","page":"Messages","title":"Messages","text":"","category":"section"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Daf.Messages","category":"page"},{"location":"messages.html#Daf.Messages","page":"Messages","title":"Daf.Messages","text":"Functions for improving the quality of error and log messages.\n\n\n\n\n\n","category":"module"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Daf.Messages.unique_name\nDaf.Messages.depict\nDaf.Messages.depict_percent","category":"page"},{"location":"messages.html#Daf.Messages.unique_name","page":"Messages","title":"Daf.Messages.unique_name","text":"unique_name(prefix::AbstractString)::AbstractString\n\nUsing short, human-readable unique names for things is a great help when debugging. Normally one has to choose between using a human-provided short non-unique name, and an opaque object identifier, or a combination thereof. This function replaces the opaque object identifier with a short counter, which gives names that are both unique and short.\n\nThat is, this will return a unique name starting with the prefix and followed by #, the process index (if using multiple processes), and an index (how many times this name was used in the process). For example, unique_name(\"foo\") will return foo for the first usage, foo#2 for the 2nd, etc. If using multiple processes, it will return foo, foo#1.2, etc.\n\nThat is, for code where the names are unique (e.g., a simple script or Jupyter notebook), this doesn't mess up the names. It only appends a suffix to the names if it is needed to disambiguate between multiple uses of the same name.\n\nTo help with tests, if the prefix ends with !, we return it as-is, accepting it may not be unique.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Daf.MatrixLayouts.depict","page":"Messages","title":"Daf.MatrixLayouts.depict","text":"depict(value::Any)::String\n\nDepict a value in an error message or a log entry. Unlike \"$(value)\", this focuses on producing a human-readable indication of the type of the value, so it double-quotes strings, prefixes symbols with :, and reports the type and sizes of arrays rather than showing their content, as well as having specializations for the various Daf data types.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Daf.Messages.depict_percent","page":"Messages","title":"Daf.Messages.depict_percent","text":"depict_percent(used::Integer, out_of::Integer)::String\n\nDescribe a fraction of used amount out_of some total as a percentage.\n\n\n\n\n\n","category":"function"},{"location":"messages.html#Index","page":"Messages","title":"Index","text":"","category":"section"},{"location":"messages.html","page":"Messages","title":"Messages","text":"Pages = [\"messages.md\"]","category":"page"},{"location":"matrix_layouts.html#Matrix-layouts","page":"Matrix layouts","title":"Matrix layouts","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts","page":"Matrix layouts","title":"Daf.MatrixLayouts","text":"All stored Daf matrix data has a clear matrix layout, that is, a major_axis, regardless of whether it is dense or sparse.\n\nThat is, for Columns-major data, the values of each column are laid out consecutively in memory (each column is a single contiguous vector), so any operation that works on whole columns will be fast (e.g., summing the value of each column). In contrast, the values of each row are stored far apart from each other, so any operation that works on whole rows will be very slow in comparison (e.g., summing the value of each row).\n\nFor Rows-major data, the values of each row are laid out consecutively in memory (each row is a single contiguous vector). In contrast, the values of each column are stored far apart from each other. In this case, summing columns would be slow, and summing rows would be fast.\n\nThis is much simpler than the ArrayLayouts module which attempts to fully describe the layout of N-dimensional arrays, a much more ambitious goal which is an overkill for our needs.\n\nnote: Note\nThe \"default\" layout in Julia is column-major, which inherits this from matlab, which inherits this from FORTRAN, allegedly because this is more efficient for some linear algebra operations. In contrast, Python numpy uses row-major layout by default. In either case, this is just an arbitrary convention, and all systems work just fine with data of either memory layout; the key consideration is to keep track of the layout, and to apply operations \"with the grain\" rather than \"against the grain\" of the data.\n\n\n\n\n\n","category":"module"},{"location":"matrix_layouts.html#Symbolic-names-for-axes","page":"Matrix layouts","title":"Symbolic names for axes","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.Rows\nDaf.MatrixLayouts.Columns\nDaf.MatrixLayouts.axis_name","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.Rows","page":"Matrix layouts","title":"Daf.MatrixLayouts.Rows","text":"A symbolic name for the rows axis. It is much more readable to write, say, size(matrix, Rows), instead of size(matrix, 1).\n\n\n\n\n\n","category":"constant"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.Columns","page":"Matrix layouts","title":"Daf.MatrixLayouts.Columns","text":"A symbolic name for the rows axis. It is much more readable to write, say, size(matrix, Columns), instead of size(matrix, 2).\n\n\n\n\n\n","category":"constant"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.axis_name","page":"Matrix layouts","title":"Daf.MatrixLayouts.axis_name","text":"axis_name(axis::Maybe{Integer})::String\n\nReturn the name of the axis (for messages).\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Checking-layout","page":"Matrix layouts","title":"Checking layout","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.major_axis\nDaf.MatrixLayouts.require_major_axis\nDaf.MatrixLayouts.minor_axis\nDaf.MatrixLayouts.require_minor_axis\nDaf.MatrixLayouts.other_axis","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.major_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.major_axis","text":"major_axis(matrix::AbstractMatrix)::Maybe{Int8}\n\nReturn the index of the major axis of a matrix, that is, the axis one should keep fixed for an efficient inner loop accessing the matrix elements. If the matrix doesn't support any efficient access axis, returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.require_major_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.require_major_axis","text":"require_major_axis(matrix::AbstractMatrix)::Int8\n\nSimilar to major_axis but will error if the matrix isn't in either row-major or column-major layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.minor_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.minor_axis","text":"minor_axis(matrix::AbstractMatrix)::Maybe{Int8}\n\nReturn the index of the minor axis of a matrix, that is, the axis one should vary for an efficient inner loop accessing the matrix elements. If the matrix doesn't support any efficient access axis, returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.require_minor_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.require_minor_axis","text":"require_minor_axis(matrix::AbstractMatrix)::Int8\n\nSimilar to minor_axis but will error if the matrix isn't in either row-major or column-major layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.other_axis","page":"Matrix layouts","title":"Daf.MatrixLayouts.other_axis","text":"other_axis(axis::Maybe{Integer})::Maybe{Int8}\n\nReturn the other matrix axis (that is, convert between Rows and Columns). If given nothing returns nothing.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Changing-layout","page":"Matrix layouts","title":"Changing layout","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.relayout!","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.relayout!","page":"Matrix layouts","title":"Daf.MatrixLayouts.relayout!","text":"relayout!(matrix::AbstractMatrix)::AbstractMatrix\nrelayout!(matrix::NamedMatrix)::NamedMatrix\nrelayout!(destination::AbstractMatrix, source::AbstractMatrix)::AbstractMatrix\nrelayout!(destination::AbstractMatrix, source::NamedMatrix)::NamedMatrix\n\nReturn the same matrix data, but in the other memory layout.\n\nSuppose you have a column-major UMIs matrix, whose rows are cells, and columns are genes. Therefore, summing the UMIs of a gene will be fast, but summing the UMIs of a cell will be slow. A transpose (no !) of a matrix is fast; it creates a zero-copy wrapper of the matrix with flipped axes, so its rows will be genes and columns will be cells, but in row-major layout. Therefore, still, summing the UMIs of a gene is fast, and summing the UMIs of a cell is slow.\n\nIn contrast, transpose! (with a !) is slow; it creates a rearranged copy of the data, also returning a matrix whose rows are genes and columns are cells, but this time, in column-major layout. Therefore, in this case summing the UMIs of a gene will be slow, and summing the UMIs of a cell will be fast.\n\nnote: Note\nIt is almost always worthwhile to relayout! a matrix and then perform operations \"with the grain\" of the data, instead of skipping it and performing operations \"against the grain\" of the data. This is because (in Julia at least) the implementation of transpose! is optimized for the task, while the other operations typically don't provide any specific optimizations for working \"against the grain\" of the data. The benefits of a relayout! become even more significant when performing a series of operations (e.g., summing the gene UMIs in each cell, converting gene UMIs to fractions out of these totals, then computing the log base 2 of this fraction).\n\nIf you transpose (no !) the result of transpose! (with a !), you end up with a matrix that appears to be the same as the original (rows are cells and columns are genes), but behaves differently - summing the UMIs of a gene will be slow, and summing the UMIs of a cell is fast. This transpose of transpose! is a common idiom and is basically what relayout! does for you. In addition, relayout! will work for both sparse and dense matrices, and if destination is not specified, a similar matrix is allocated automatically for it.\n\nnote: Note\nThe caller is responsible for providing a sensible destination matrix (sparse for a sparse source, dense for a non-sparse source). This can be a transposed matrix. If source is a NamedMatrix, then the result will be a NamedMatrix with the same axes. If destination is also a NamedMatrix, then its axes must match source.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Ensuring-code-efficiency","page":"Matrix layouts","title":"Ensuring code efficiency","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Daf.MatrixLayouts.check_efficient_action\nDaf.MatrixLayouts.inefficient_action_handler","category":"page"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.check_efficient_action","page":"Matrix layouts","title":"Daf.MatrixLayouts.check_efficient_action","text":"check_efficient_action(\n action::AbstractString,\n axis::Integer,\n operand::AbstractString,\n matrix::AbstractMatrix,\n)::Nothing\n\nThis will check whether the action about to be executed for an operand which is matrix works \"with the grain\" of the data, which requires the matrix to be in axis-major layout. If it isn't, then apply the inefficient_action_handler.\n\nIn general, you really want operations to go \"with the grain\" of the data. Unfortunately, Julia (and Python, and R, and matlab) will silently run operations \"against the grain\", which would be painfully slow. A liberal application of this function in your code will help in detecting such slowdowns, without having to resort to profiling the code to isolate the problem.\n\nnote: Note\nThis will not prevent the code from performing \"against the grain\" operations such as selectdim(matrix, Rows, 1) for a column-major matrix, but if you add this check before performing any (series of) operations on a matrix, then you will have a clear indication of whether (and where) such operations occur. You can then consider whether to invoke relayout! on the data, or (for data fetched from Daf), simply query for the other memory layout.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Daf.MatrixLayouts.inefficient_action_handler","page":"Matrix layouts","title":"Daf.MatrixLayouts.inefficient_action_handler","text":"inefficient_action_handler(handler::AbnormalHandler)::AbnormalHandler\n\nSpecify the AbnormalHandler to use when accessing a matrix in an inefficient way (\"against the grain\"). Returns the previous handler. The default handler is WarnHandler.\n\n\n\n\n\n","category":"function"},{"location":"matrix_layouts.html#Index","page":"Matrix layouts","title":"Index","text":"","category":"section"},{"location":"matrix_layouts.html","page":"Matrix layouts","title":"Matrix layouts","text":"Pages = [\"matrix_layouts.md\"]","category":"page"},{"location":"readers.html#Readers","page":"Readers","title":"Readers","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers\nDaf.Readers.description","category":"page"},{"location":"readers.html#Daf.Readers","page":"Readers","title":"Daf.Readers","text":"The DafReader interface specifies a high-level API for reading Daf data. This API is implemented here, on top of the low-level FormatReader API. The high-level API provides thread safety so the low-level API can (mostly) ignore this issue.\n\nEach data set is given a name to use in error messages etc. You can explicitly set this name when creating a Daf object. Otherwise, when opening an existing data set, if it contains a scalar \"name\" property, it is used. Otherwise some reasonable default is used. In all cases, object names are passed through unique_name to avoid ambiguity.\n\nData properties are identified by a unique name given the axes they are based on. That is, there is a separate namespace for scalar properties, vector properties for each specific axis, and matrix properties for each unordered pair of axes.\n\nFor matrices, we keep careful track of their MatrixLayouts. Returned matrices are always in column-major layout, using relayout! if necessary. As this is an expensive operation, we'll cache the result in memory. Similarly, we cache the results of applying a query to the data. We allow clearing the cache to reduce memory usage, if necessary.\n\nThe data API is the high-level API intended to be used from outside the package, and is therefore re-exported from the top-level Daf namespace. It provides additional functionality on top of the low-level FormatReader implementation, accepting more general data types, automatically dealing with relayout! when needed. In particular, it enforces single-writer multiple-readers for each data set, so the format code can ignore multi-threading and still be thread-safe.\n\nnote: Note\nIn the APIs below, when getting a value, specifying a default of undef means that it is an error for the value not to exist. In contrast, specifying a default of nothing means it is OK for the value not to exist, returning nothing. Specifying an actual value for default means it is OK for the value not to exist, returning the default instead. This is in spirit with, but not identical to, undef being used as a flag for array construction saying \"there is no initializer\". If you feel this is an abuse of the undef value, take some comfort in that it is the default value for the default, so you almost never have to write it explicitly in your code.\n\n\n\n\n\n","category":"module"},{"location":"readers.html#Daf.Readers.description","page":"Readers","title":"Daf.Readers.description","text":"description(daf::DafReader[; deep::Bool = false])::AbstractString\n\nReturn a (multi-line) description of the contents of daf. This tries to hit a sweet spot between usefulness and terseness. If cache, also describes the content of the cache. If deep, also describes any data set nested inside this one (if any).\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Scalar-properties","page":"Readers","title":"Scalar properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_scalar\nDaf.Readers.get_scalar","category":"page"},{"location":"readers.html#Daf.Readers.has_scalar","page":"Readers","title":"Daf.Readers.has_scalar","text":"has_scalar(daf::DafReader, name::AbstractString)::Bool\n\nCheck whether a scalar property with some name exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_scalar","page":"Readers","title":"Daf.Readers.get_scalar","text":"get_scalar(\n daf::DafReader,\n name::AbstractString;\n [default::Union{StorageScalar, Nothing, UndefInitializer} = undef]\n)::Maybe{StorageScalar}\n\nGet the value of a scalar property with some name in daf.\n\nIf default is undef (the default), this first verifies the name scalar property exists in daf. Otherwise default will be returned if the property does not exist.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Readers-axes","page":"Readers","title":"Readers axes","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_axis\nDaf.Readers.axis_names\nDaf.Readers.get_axis\nDaf.Readers.axis_length","category":"page"},{"location":"readers.html#Daf.Readers.has_axis","page":"Readers","title":"Daf.Readers.has_axis","text":"has_axis(daf::DafReader, axis::AbstractString)::Bool\n\nCheck whether some axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.axis_names","page":"Readers","title":"Daf.Readers.axis_names","text":"axis_names(daf::DafReader)::AbstractStringSet\n\nThe names of the axes of daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_axis","page":"Readers","title":"Daf.Readers.get_axis","text":"get_axis(\n daf::DafReader,\n axis::AbstractString;\n [default::Union{Nothing, UndefInitializer} = undef]\n)::Maybe{AbstractStringVector}\n\nThe unique names of the entries of some axis of daf. This is similar to doing get_vector for the special name property, except that it returns a simple vector of strings instead of a NamedVector.\n\nIf default is undef (the default), this verifies the axis exists in daf. Otherwise, the default is nothing, which is returned if the axis does not exist.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.axis_length","page":"Readers","title":"Daf.Readers.axis_length","text":"axis_length(daf::DafReader, axis::AbstractString)::Int64\n\nThe number of entries along the axis in daf.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Vector-properties","page":"Readers","title":"Vector properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_vector\nDaf.Readers.vector_names\nDaf.Readers.get_vector","category":"page"},{"location":"readers.html#Daf.Readers.has_vector","page":"Readers","title":"Daf.Readers.has_vector","text":"has_vector(daf::DafReader, axis::AbstractString, name::AbstractString)::Bool\n\nCheck whether a vector property with some name exists for the axis in daf. This is always true for the special name property.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.vector_names","page":"Readers","title":"Daf.Readers.vector_names","text":"vector_names(daf::DafReader, axis::AbstractString)::AbstractStringSet\n\nThe names of the vector properties for the axis in daf, not including the special name property.\n\nThis first verifies the axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_vector","page":"Readers","title":"Daf.Readers.get_vector","text":"get_vector(\n daf::DafReader,\n axis::AbstractString,\n name::AbstractString;\n [default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef]\n)::Maybe{NamedVector}\n\nGet the vector property with some name for some axis in daf. The names of the result are the names of the vector entries (same as returned by get_axis). The special property name returns an array whose values are also the (read-only) names of the entries of the axis.\n\nThis first verifies the axis exists in daf. If default is undef (the default), this first verifies the name vector exists in daf. Otherwise, if default is nothing, it will be returned. If it is a StorageVector, it has to be of the same size as the axis, and is returned. If it is a StorageScalar. Otherwise, a new Vector is created of the correct size containing the default, and is returned.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Matrix-properties","page":"Readers","title":"Matrix properties","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.has_matrix\nDaf.Readers.matrix_names\nDaf.Readers.get_matrix","category":"page"},{"location":"readers.html#Daf.Readers.has_matrix","page":"Readers","title":"Daf.Readers.has_matrix","text":"has_matrix(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [relayout::Bool = true]\n)::Bool\n\nCheck whether a matrix property with some name exists for the rows_axis and the columns_axis in daf. Since this is Julia, this means a column-major matrix. A daf may contain two copies of the same data, in which case it would report the matrix under both axis orders.\n\nIf relayout (the default), this will also check whether the data exists in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.matrix_names","page":"Readers","title":"Daf.Readers.matrix_names","text":"matrix_names(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString;\n [relayout::Bool = true]\n)::AbstractStringSet\n\nThe names of the matrix properties for the rows_axis and columns_axis in daf.\n\nIf relayout (default), then this will include the names of matrices that exist in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exist in daf.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.get_matrix","page":"Readers","title":"Daf.Readers.get_matrix","text":"get_matrix(\n daf::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [default::Union{StorageNumber, StorageMatrix, Nothing, UndefInitializer} = undef,\n relayout::Bool = true]\n)::Maybe{NamedMatrix}\n\nGet the column-major matrix property with some name for some rows_axis and columns_axis in daf. The names of the result axes are the names of the relevant axes entries (same as returned by get_axis).\n\nIf relayout (the default), then if the matrix is only stored in the other memory layout (that is, with flipped axes), then automatically call relayout! to compute the result. If daf isa DafWriter, then store the result for future use; otherwise, just cache it as MemoryData. This may lock up very large amounts of memory; you can call empty_cache! to release it.\n\nThis first verifies the rows_axis and columns_axis exist in daf. If default is undef (the default), this first verifies the name matrix exists in daf. Otherwise, if default is nothing, it is returned. If default is a StorageMatrix, it has to be of the same size as the rows_axis and columns_axis, and is returned. Otherwise, a new Matrix is created of the correct size containing the default, and is returned.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Utilities","page":"Readers","title":"Utilities","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Daf.Readers.axis_version_counter\nDaf.Readers.vector_version_counter\nDaf.Readers.matrix_version_counter","category":"page"},{"location":"readers.html#Daf.Readers.axis_version_counter","page":"Readers","title":"Daf.Readers.axis_version_counter","text":"axis_version_counter(daf::DafReader, axis::AbstractString)::UInt32\n\nReturn the version number of the axis. This is incremented every time delete_axis! is called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.vector_version_counter","page":"Readers","title":"Daf.Readers.vector_version_counter","text":"vector_version_counter(daf::DafReader, axis::AbstractString, name::AbstractString)::UInt32\n\nReturn the version number of the vector. This is incremented every time set_vector!, empty_dense_vector! or empty_sparse_vector! are called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Daf.Readers.matrix_version_counter","page":"Readers","title":"Daf.Readers.matrix_version_counter","text":"matrix_version_counter(daf::DafReader, rows_axis::AbstractString, columns_axis::AbstractString, name::AbstractString)::UInt32\n\nReturn the version number of the matrix. The order of the axes does not matter. This is incremented every time set_matrix!, empty_dense_matrix! or empty_sparse_matrix! are called. It is used by interfaces to other programming languages to minimize copying data.\n\nnote: Note\nThis is purely in-memory per-instance, and not a global persistent version counter. That is, the version counter starts at zero even if opening a persistent disk daf data set.\n\n\n\n\n\n","category":"function"},{"location":"readers.html#Index","page":"Readers","title":"Index","text":"","category":"section"},{"location":"readers.html","page":"Readers","title":"Readers","text":"Pages = [\"readers.md\"]","category":"page"},{"location":"copies.html#Copies","page":"Copies","title":"Copies","text":"","category":"section"},{"location":"copies.html","page":"Copies","title":"Copies","text":"Daf.Copies\nDaf.Copies.copy_scalar!\nDaf.Copies.copy_axis!\nDaf.Copies.copy_vector!\nDaf.Copies.copy_matrix!\nDaf.Copies.copy_all!\nDaf.Copies.EmptyData\nDaf.Copies.DataTypes","category":"page"},{"location":"copies.html#Daf.Copies","page":"Copies","title":"Daf.Copies","text":"Copy data between Daf data sets.\n\nnote: Note\nCopying into an in-memory data set does not duplicate the data; instead it just shares a reference to it. This is fast. In contrast, copying into a disk-based data set (e.g. using HDF5 or simple files) will create a duplicate of the data on disk. This is slow. However, both directions will not significantly increase the amount of memory allocated by the application.\n\n\n\n\n\n","category":"module"},{"location":"copies.html#Daf.Copies.copy_scalar!","page":"Copies","title":"Daf.Copies.copy_scalar!","text":"copy_scalar(;\n destination::DafWriter,\n source::DafReader,\n name::AbstractString,\n [rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, Nothing, UndefInitializer} = undef,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a scalar with some name from some source DafReader into some destination DafWriter.\n\nThe scalar is fetched using the name and the default. If rename is specified, store the scalar using this new name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing scalar in the target.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_axis!","page":"Copies","title":"Daf.Copies.copy_axis!","text":"copy_axis(;\n destination::DafWriter,\n source::DafReader,\n axis::AbstractString,\n [rename::Maybe{AbstractString} = nothing,\n default::Union{Nothing, UndefInitializer} = undef]\n)::Nothing\n\nCopy an axis from some source DafReader into some destination DafWriter.\n\nThe axis is fetched using the name and the default. If rename is specified, store the axis using this name.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_vector!","page":"Copies","title":"Daf.Copies.copy_vector!","text":"copy_vector(;\n destination::DafWriter,\n source::DafReader,\n axis::AbstractString,\n name::AbstractString,\n [reaxis::Maybe{AbstractString} = nothing,\n rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef,\n empty::Maybe{StorageScalar} = nothing,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a vector from some source DafReader into some destination DafWriter.\n\nThe vector is fetched using the axis, name and the default. If reaxis is specified, store the vector using this axis. If rename is specified, store the vector using this name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing vector in the target.\n\nThis requires the axis of one data set is the same, or is a superset of, or a subset of, the other. If the target axis contains entries that do not exist in the source, then empty must be specified to fill the missing values. If the source axis contains entries that do not exist in the target, they are discarded (not copied).\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_matrix!","page":"Copies","title":"Daf.Copies.copy_matrix!","text":"copy_matrix(;\n destination::DafWriter,\n source::DafReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n [rows_reaxis::Maybe{AbstractString} = nothing,\n columns_reaxis::Maybe{AbstractString} = nothing,\n rename::Maybe{AbstractString} = nothing,\n dtype::Maybe{Type{T}} = nothing,\n default::Union{StorageScalar, StorageVector, Nothing, UndefInitializer} = undef,\n empty::Maybe{StorageScalar} = nothing,\n relayout::Bool = true,\n overwrite::Bool = false]\n)::Nothing where {T <: StorageScalarBase}\n\nCopy a matrix from some source DafReader into some destination DafWriter.\n\nThe matrix is fetched using the rows_axis, columns_axis, name, relayout and the default. If rows_reaxis and/or columns_reaxis are specified, store the vector using these axes. If rename is specified, store the matrix using this name. If dtype is specified, the data is converted to this type. If overwrite (not the default), overwrite an existing matrix in the target. The matrix is stored with the same relayout.\n\nThis requires each axis of one data set is the same, or is a superset of, or a subset of, the other. If a target axis contains entries that do not exist in the source, then empty must be specified to fill the missing values. If a source axis contains entries that do not exist in the target, they are discarded (not copied).\n\nnote: Note\nWhen copying a matrix from a subset to a superset, if the empty value is zero, then we create a sparse matrix in the destination. However, currently we create a temporary dense matrix for this; this is inefficient and should be replaced by a more efficient method.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.copy_all!","page":"Copies","title":"Daf.Copies.copy_all!","text":"copy_all!(;\n destination::DafWriter,\n source::DafReader\n [empty::Maybe{EmptyData} = nothing,\n dtypes::Maybe{DataTypes} = nothing,\n overwrite::Bool = false,\n relayout::Bool = true]\n)::Nothing\n\nCopy all the content of a source DafReader into a destination DafWriter. If overwrite, this will overwrite existing data in the target. If relayout, matrices will be stored in the target both layouts, regardless of how they were stored in the source.\n\nThis will create target axes that exist in only in the source, but will not overwrite existing target axes, regardless of the value of overwrite. An axis that exists in the target must be identical to, or be a subset of, the same axis in the source.\n\nIf the source has axes which are a subset of the same axes in the target, then you must specify a dictionary of values for the empty entries that will be created in the target when copying any vector and/or matrix properties. This is specified using a (axis, property) => value entry for specifying an empty value for a vector property and a (rows_axis, columns_axis, property) => entry for specifying an empty value for a matrix property. The order of the axes for matrix properties doesn't matter (the same empty value is automatically used for both axes orders).\n\nIf dtype is specified, the copied data of the matching property is converted to the specified data type.\n\n\n\n\n\n","category":"function"},{"location":"copies.html#Daf.Copies.EmptyData","page":"Copies","title":"Daf.Copies.EmptyData","text":"Specify the data to use for missing properties in a Daf data set. This is a dictionary with an DataKey specifying for which property we specify a value to, and the value to use.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs when initializing this dictionary. That is, what we'd like to say is:EmptyData = AbstractDict{DataKey, StorageScalar}But what we are forced to say is:EmptyData = AbstractDictThat's not a mistake. Even EmptyData = AbstractDict{Key, StorageScalar} where {Key} fails to work, as do all the (many) possibilities for expressing \"this is a dictionary where the key or the value can be one of several things\" Sigh. Glory to anyone who figures out an incantation that would force the system to perform any meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"copies.html#Daf.Copies.DataTypes","page":"Copies","title":"Daf.Copies.DataTypes","text":"Specify the data type to use for overriding properties types in a Daf data set. This is a dictionary with an DataKey specifying for which property we specify a value to, and the data type to use.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs when initializing this dictionary. That is, what we'd like to say is:DataTypes = AbstractDict{DataKey, Type{T}} where {T <: StorageScalarBase}But what we are forced to say is:DataTypes = AbstractDictThat's not a mistake. Even DataTypes = AbstractDict{Key, T <: StorageScalarBase} where {Key, T <: StorageScalarBase} fails to work, as do all the (many) possibilities for expressing \"this is a dictionary where the key or the value can be one of several things\" Sigh. Glory to anyone who figures out an incantation that would force the system to perform any meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"copies.html#Index","page":"Copies","title":"Index","text":"","category":"section"},{"location":"copies.html","page":"Copies","title":"Copies","text":"Pages = [\"copies.md\"]","category":"page"},{"location":"files_format.html#Files-Format","page":"Files Format","title":"Files Format","text":"","category":"section"},{"location":"files_format.html","page":"Files Format","title":"Files Format","text":"Daf.FilesFormat\nDaf.FilesFormat.MAJOR_VERSION\nDaf.FilesFormat.MINOR_VERSION\nDaf.FilesFormat.FilesDaf","category":"page"},{"location":"files_format.html#Daf.FilesFormat","page":"Files Format","title":"Daf.FilesFormat","text":"A Daf storage format in disk files. This is an efficient way to persist Daf data in a filesystem, and offers a different trade-off compared to storing the data in an HDF5 file.\n\nOn the downside, this being a directory, you need to create a zip or tar or some other form of archive file if you want to publish it. Also, accessing FilesDaf will consume multiple file descriptors as opposed to just one for HDF5, and, of course, HDF5 has libraries to support it in most systems.\n\nOn the upside, the format of the files is so simple that it is trivial to access them from any programming environment, without requiring a complex library like HDF5. In addition, since each scalar, vector or matrix property is stored in a separate file, deleting data automatically frees the storage (unlike in an HDF5 file, where you must manually repack the file to actually release the storage). Also, you can use standard tools to look at the data (e.g. use ls or the Windows file explorer to view the list of properties, how much space each one uses, when it was created, etc.). Most importantly, this allows using standard tools like make to create automatic repeatable processing workflows.\n\nWe use multiple files to store Daf data, under some root directory, as follows:\n\nThe directory will contain 4 sub-directories: scalars, axes, vectors, and matrices, and a file called daf.json.\nThe daf.json signifies that the directory contains Daf data. In this file, there should be a mapping with a version key whose value is an array of two integers. The first is the major version number and the second is the minor version number, using semantic versioning. This makes it easy to test whether a directory does/n't contain Daf data, and which version of the internal structure it is using. Currently the only defined version is [1,0].\nThe scalars directory contains scalar properties, each as in its own name.json file, containing a mapping with a type key whose value is the data type of the scalar (one of the StorageScalar types, with String for a string scalar) and a value key whose value is the actual scalar value.\nThe axes directory contains a name.txt file per axis, where each line contains a name of an axis entry.\nThe vectors directory contains a directory per axis, containing the vectors. For every vector, a name.json file will contain a mapping with an eltype key specifying the type of the vector element, and a format key specifying how the data is stored on disk, one of dense and sparse.\nIf the format is dense, then there will be a file containing the vector entries, either name.txt for strings (with a value per line), or name.data for binary data (which we can memory-map for direct access).\nIf the format is sparse, then there will also be an indtype key specifying the data type of the indices of the non-zero values, and two binary data files, name.nzind containing the indices of the non-zero entries, and name.nzval containing the values of the non-zero entries (which we can memory-map for direct access). See Julia's SparseVector implementation for details.\nThe matrices directly contains a directory per rows axis, which contains a directory per columns axis, which contains the matrices. For each matrix, a name.json file will contain a mapping with an eltype key specifying the type of the matrix element, and a format key specifying how the data is stored on disk, one of dense and sparse.\nIf the format is dense, then there will be a name.data binary file in column-major layout (which we can memory-map for direct access).\nIf the format is sparse, then there will also be an indtype key specifying the data type of the indices of the non-zero values, and three binary data files, name.colptr, name.rowval containing the indices of the non-zero values, and name.nzval containing the values of the non-zero entries (which we can memory-map for direct access). See Julia's SparseMatrixCSC implementation for details.\n\nExample directory structure:\n\nexample-daf-dataset-root-directory/\n├─ daf.json\n├─ scalars/\n│ └─ version.json\n├─ axes/\n│ ├─ cell.txt\n│ └─ gene.txt\n├─ vectors/\n│ ├─ cell/\n│ │ ├─ batch.json\n│ │ └─ batch.txt\n│ └─ gene/\n│ ├─ is_marker.json\n│ └─ is_marker.data\n└─ matrices/\n ├─ cell/\n │ ├─ cell/\n │ └─ gene/\n │ ├─ UMIs.json\n │ ├─ UMIs.colptr\n │ ├─ UMIs.rowval\n │ └─ UMIs.nzval\n └─ gene/\n ├─ cell/\n └─ gene/\n\nnote: Note\nAll binary data is stored as a sequence of elements, in little endian byte order (which is the native order for modern CPUs), without any headers or padding. (Dense) matrices are stored in column-major layout (which matches Julia's native matrix layout).All string data is stored in lines, one entry per line, separated by a `\n\ncharacter (regardless of the OS used). Therefore, you can't have a line break inside an axis entry name or in a vector property value, at least not when storing it inFilesDaf`.\n\nWhen creating an HDF5 file to contain `Daf` data, you should specify\n`;fapl=HDF5.FileAccessProperties(;alignment=(1,8))`. This ensures all the memory buffers are properly aligned for\nefficient access. Otherwise, memory mapping will be **much** less efficient. A warning is therefore generated\nwhenever you try to access `Daf` data stored in an HDF5 file which does not enforce proper alignment.\n\nThat's all there is to it. The format is intentionally simple and transparent to maximize its accessibility by other (standard) tools. Still, it is easiest to create the data using the Julia Daf package.\n\nnote: Note\nThe code here assumes the files data obeys all the above conventions and restrictions. As long as you only create and access Daf data in files using FilesDaf, then the code will work as expected (assuming no bugs). However, if you do this in some other way (e.g., directly using the filesystem and custom tools), and the result is invalid, then the code here may fails with \"less than friendly\" error messages.\n\n\n\n\n\n","category":"module"},{"location":"files_format.html#Daf.FilesFormat.MAJOR_VERSION","page":"Files Format","title":"Daf.FilesFormat.MAJOR_VERSION","text":"The specific major version of the FilesDaf format that is supported by this code (1). The code will refuse to access data that is stored in a different major format.\n\n\n\n\n\n","category":"constant"},{"location":"files_format.html#Daf.FilesFormat.MINOR_VERSION","page":"Files Format","title":"Daf.FilesFormat.MINOR_VERSION","text":"The maximal minor version of the FilesDaf format that is supported by this code (0). The code will refuse to access data that is stored with the expected major version (1), but that uses a higher minor version.\n\nnote: Note\nModifying data that is stored with a lower minor version number may increase its minor version number.\n\n\n\n\n\n","category":"constant"},{"location":"files_format.html#Daf.FilesFormat.FilesDaf","page":"Files Format","title":"Daf.FilesFormat.FilesDaf","text":"FilesDaf(\n path::AbstractString,\n mode::AbstractString = \"r\";\n [name::Maybe{AbstractString} = nothing]\n)\n\nStorage in disk files in some directory.\n\nWhen opening an existing data set, if name is not specified, and there exists a \"name\" scalar property, it is used as the name. Otherwise, the path will be used as the name.\n\nThe valid mode values are as follows (the default mode is r):\n\nMode Allow modifications? Create if does not exist? Truncate if exists? Returned type\nr No No No DafReadOnly\nr+ Yes No No FilesDaf\nw+ Yes Yes No FilesDaf\nw Yes Yes Yes FilesDaf\n\n\n\n\n\n","category":"type"},{"location":"files_format.html#Index","page":"Files Format","title":"Index","text":"","category":"section"},{"location":"files_format.html","page":"Files Format","title":"Files Format","text":"Pages = [\"files_format.md\"]","category":"page"},{"location":"groups.html#Groups","page":"Groups","title":"Groups","text":"","category":"section"},{"location":"groups.html","page":"Groups","title":"Groups","text":"Daf.Groups\nDaf.Groups.group_names\nDaf.Groups.compact_groups!\nDaf.Groups.collect_group_members","category":"page"},{"location":"groups.html#Daf.Groups","page":"Groups","title":"Daf.Groups","text":"Functions for dealing with computing groups of axis entries (typically for creating a new axis).\n\n\n\n\n\n","category":"module"},{"location":"groups.html#Daf.Groups.group_names","page":"Groups","title":"Daf.Groups.group_names","text":"group_names(\n daf::DafReader,\n axis::AbstractString,\n entries_of_groups::AbstractVector{<:AbstractVector{<:Integer}};\n prefix::AbstractString,\n)::Vector{String}\n\nGiven an entries_of_groups vector of vectors, one for each group, containing the (sorted) indices of the entries of the group along some axis of some daf data set, return a vector giving a unique name for each group. This name consists of the prefix, followed by the index of the group, followed by a .XX two-digit suffix which is a hash of the names of the axis entries of the group.\n\nThe returned names strike a balance between readability and safety. A name like M123.89 for group #123 is easy to deal with manually, but is also reasonably safe in the common use case that groups are re-computed, and there is per-group metadata lying around associated with the old groups, as the probability of the new group #123 having the same suffix is only 1% (unless it is actually identical).\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Daf.Groups.compact_groups!","page":"Groups","title":"Daf.Groups.compact_groups!","text":"compact_groups!(\n group_indices::AbstractVector{<:Integer},\n)::Int\n\nGiven an array group_indices which assigns each entry of some axis to a non-negative group index (with zero meaning \"no group\"), compact it in-place so that the group indices will be 1...N, and return N.\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Daf.Groups.collect_group_members","page":"Groups","title":"Daf.Groups.collect_group_members","text":"collect_group_members(\n group_indices::AbstractVector{T},\n)::Vector{Vector{T}} where {T <: Integer}\n\nGiven an array group_indices which assigns each entry of some axis to a non-negative group index (with zero meaning \"no group\"), where the group indices are compact (in the range 1...N), return a vector of vectors, one for each group, containing the (sorted) indices of the entries of the group.\n\n\n\n\n\n","category":"function"},{"location":"groups.html#Index","page":"Groups","title":"Index","text":"","category":"section"},{"location":"groups.html","page":"Groups","title":"Groups","text":"Pages = [\"groups.md\"]","category":"page"},{"location":"generic_logging.html#Generic-Logging","page":"Generic Logging","title":"Generic Logging","text":"","category":"section"},{"location":"generic_logging.html","page":"Generic Logging","title":"Generic Logging","text":"Daf.GenericLogging\nDaf.GenericLogging.setup_logger\nDaf.GenericLogging.@logged","category":"page"},{"location":"generic_logging.html#Daf.GenericLogging","page":"Generic Logging","title":"Daf.GenericLogging","text":"Generic macros and functions for logging, that arguably should belong in a more general-purpose package.\n\nWe do not re-export the macros and functions defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericLogging.\n\n\n\n\n\n","category":"module"},{"location":"generic_logging.html#Daf.GenericLogging.setup_logger","page":"Generic Logging","title":"Daf.GenericLogging.setup_logger","text":"setup_logger(\n io::IO = stderr;\n [level::LogLevel = Warn,\n show_time::Bool = true,\n show_module::Bool = true,\n show_location::Bool = false]\n)::Nothing\n\nSetup a global logger that will print into io, printing messages with a timestamp prefix.\n\nBy default, this will only print warnings. Note that increasing the log level will apply to everything. An alternative is to set up the environment variable JULIA_DEBUG to a comma-separated list of modules you wish to see the debug messages of.\n\nIf show_time, each message will be prefixed with a yyyy-dd-mm HH:MM:SS.sss timestamp prefix.\n\nIf show_module, each message will be prefixed with the name of the module emitting the message.\n\nIf show_location, each message will be prefixed with the file name and the line number emitting the message.\n\n\n\n\n\n","category":"function"},{"location":"generic_logging.html#Daf.GenericLogging.@logged","page":"Generic Logging","title":"Daf.GenericLogging.@logged","text":"@logged function something(...)\n return ...\nend\n\nAutomatically log (in Debug level) every invocation to the function. This will also log the values of the arguments. Emits a second log entry when the function returns, with the result (if any).\n\n\n\n\n\n","category":"macro"},{"location":"generic_logging.html#Index","page":"Generic Logging","title":"Index","text":"","category":"section"},{"location":"generic_logging.html","page":"Generic Logging","title":"Generic Logging","text":"Pages = [\"generic_logging.md\"]","category":"page"},{"location":"generic_types.html#Generic-Types","page":"Generic Types","title":"Generic Types","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes","category":"page"},{"location":"generic_types.html#Daf.GenericTypes","page":"Generic Types","title":"Daf.GenericTypes","text":"Generic types that arguably should belong in a more general-purpose package.\n\nWe do not re-export the types and functions defined here from the top-level Daf namespace. That is, even if using Daf, you will not have these generic names polluting your namespace. If you do want to reuse them in your code, explicitly write using Daf.GenericTypes.\n\n\n\n\n\n","category":"module"},{"location":"generic_types.html#Unions","page":"Generic Types","title":"Unions","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"We got sick and tired of writing Union{..., Nothing} everywhere. We therefore created this shorthand unions listed below and used them throughout the code. We're well aware there was a religious war of whether there should be a standard shorthand for this, vs. a standard shorthand for Union{..., Missing}, with everyone losing, that is, having to use the explicit Union notation everywhere.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Looking at the answers here then Nothing means \"there is no value\" and Missing means \"there is a value, but we don't know what it is\" (Unknown might have been a better name).","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Under this interpretation, Union{..., Nothing} has (almost) the same semantics as Haskell's Maybe, so that's what we called it (other languages call this Optional or Opt). It is used heavily in our (and a lot of other) Julia code. We also added Unsure as a shorthand for Union{..., Missing} for completeness, but we do not actually use it anywhere. We assume it is useful for Julia code dealing specifically with statistical analysis.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes.Maybe\nDaf.GenericTypes.Unsure","category":"page"},{"location":"generic_types.html#Daf.GenericTypes.Maybe","page":"Generic Types","title":"Daf.GenericTypes.Maybe","text":"Maybe{T} = Union{T, Nothing}\n\nThe type to use when maybe there is a value, maybe there isn't. This is exactly as if writing the explicit Union with Nothing but is shorter and more readable. This is extremely common.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Daf.GenericTypes.Unsure","page":"Generic Types","title":"Daf.GenericTypes.Unsure","text":"Unsure{T} = Union{T, Missing}\n\nThe type to use when maybe there always is a value, but sometimes we are not sure what it is. This is exactly as if writing the explicit Union with Missing but is shorter and more readable. This is only used in code dealing with statistics to represent missing (that is, unknown) data. It is only provided here for completeness.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Strings","page":"Generic Types","title":"Strings","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"We use the following as type annotations for function arguments to be able to accept \"any\" concrete string collection type.","category":"page"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Daf.GenericTypes.AbstractStringVector\nDaf.GenericTypes.AbstractStringSet","category":"page"},{"location":"generic_types.html#Daf.GenericTypes.AbstractStringVector","page":"Generic Types","title":"Daf.GenericTypes.AbstractStringVector","text":"AbstractStringVector = AbstractVector{S} where {S <: AbstractString}\n\nA vector of strings, without commitment to the concrete implementation of either the vector or the strings contained in it.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Daf.GenericTypes.AbstractStringSet","page":"Generic Types","title":"Daf.GenericTypes.AbstractStringSet","text":"AbstractStringSet = AbstractSet{S} where {S <: AbstractString}\n\nA set of strings, without commitment to the concrete implementation of either the set or the strings contained in it.\n\n\n\n\n\n","category":"type"},{"location":"generic_types.html#Index","page":"Generic Types","title":"Index","text":"","category":"section"},{"location":"generic_types.html","page":"Generic Types","title":"Generic Types","text":"Pages = [\"generic_types.md\"]","category":"page"},{"location":"anndata_format.html#AnnData-Format","page":"AnnData Format","title":"AnnData Format","text":"","category":"section"},{"location":"anndata_format.html","page":"AnnData Format","title":"AnnData Format","text":"Daf.AnnDataFormat\nDaf.AnnDataFormat.anndata_as_daf\nDaf.AnnDataFormat.daf_as_anndata","category":"page"},{"location":"anndata_format.html#Daf.AnnDataFormat","page":"AnnData Format","title":"Daf.AnnDataFormat","text":"Import/export Daf data from/to AnnData.\n\nDue to the different data models, not all the content of AnnData can be represented as Daf, and vice-versa. However, \"most\" of the data can be automatically converted from one form to the other. In both directions, conversion is zero-copy; that is, we merely create a different view for the same vectors and matrices. We also use memory-mapping whenever possible for increased performance.\n\nnote: Note\n\n\nWe use the AnnData Julia implementation from Muon.jl. The last published released for this package is from 2021, and lacks features added over the years, which we use. Therefore, currently Daf uses the head revision of Muon from github, with all that implies. We'll change this to a proper registry dependency if/when a new Muon version is released.\n\nThe following Daf data can't be naively stored in AnnData:\n\nAnnData is restricted to storing data for only two axes, which AnnData always calls \"obs\" and \"var\". In contrast, Daf can store data for an arbitrary set of meaningfully named axes.\nAnndata always contains a matrix property for these two axes called \"X\". Mercifully, the rest of the matrices are allowed to have meaningful names. In contrast, Daf allows storing an arbitrary set of meaningfully named matrices.\nAnnData can only hold row-major matrices, while Julia defaults to column-major layout.\n\nTherefore, when viewing Daf data as AnnData, we pick two specific axes and rename them to \"obs\" and \"var\", pick a specific matrix property of these axes and rename it to \"X\", and relayout! it if needed so AnnData would be happy. We store the discarded names of the axes and matrix in unstructured annotations called obs_is, var_is and X_is. This allows us to reconstruct the original names when re-viewing the AnnData as Daf data.\n\nThe following AnnData can't be naively stored in Daf:\n\nNon-scalars (e.g., mappings) inside uns unstructured annotations. The Daf equivalent is storing JSON string blobs, which is awkward to use. TODO: provide better API to deal with such data.\nData using nullable entries (e.g. a matrix with nullable integer entries). In contrast, Daf supports the convention that zero values are special. This only works in some cases (e.g., it isn't a good solution for Boolean data). It is possible of course to explicitly store Boolean masks and apply them to the data, but this is inconvenient. TODO: Have Daf natively support nullable/masked arrays.\nCategorical data. Categorical vectors are therefore converted to simple strings. However, Daf doesn't support matrices of strings, so it doesn't support or convert categorical matrices.\nMatrix data that only uses one of the axes (that is, obsm and varm data). The problem here is, paradoxically, that Daf supports such data \"too well\", by allowing multiple axes to be defined, and storing matrices based on any pair of axes. However, this requires the other axes to be explicitly created, and their information just doesn't exist in the AnnData data set. TODO: Allow unstructured annotations to store the entries of the other axis.\n\nWhen viewing AnnData as Daf, we either ignore, warn, or treat as an error any such unsupported data.\n\nwarning: DANGER, WILL ROBINSON\n\n\nSquare matrices accessed via Daf APIs will be the (column-major) transpose of the original AnnData (row-major) matrix.\n\nDue to limitations of the Daf data model, square matrices are stored only in column-major layout. In contrast, AnnData square matrices (obsp, varp), are stored in row-major layout. We have several bad options to address this:\n\nWe can break the Daf invariant that all accessed data is column-major, at least for square matrices. This is bad because the invariant greatly simplifies Daf client code. Forcing clients to check the data layout and calling relayout! would add a lot of error-prone boilerplate to our users.\nWe can relayout! the data when copying it between AnnData and Daf. This is bad because, it would force us to duplicate the data. More importantly, there is typically a good reason for the layout of the data. For example, assume a directed graph between cells. A common way to store is is to have a square matrix where each row contains the weights of the edges originating in one cell, connecting it to all other cells. This allows code to efficiently \"loop on all cells; loop on all outgoing edges\". If we relayout! the data, then such a loop would become extremely inefficient.\nWe can return the transposed matrix from Daf. This is bad because Julia code and Python code processing the \"same\" data would need to flip the indices (e.g., outgoing_weight[from_cell, to_cell] in Python vs. outgoing_weight[to_cell, from_cell] in Julia).\n\nHaving to pick between these bad options, we chose the last one as the lesser evil. The assumption is that Julia code is written separately from the Python code anyway. If the same algorithm is implemented in both systems, it would work (efficiently!), as long as the developer read this warning and flipped the order of the indices, that is.\n\nWe do not have this problem with non-square matrices (e.g., the per-cell-per-gene UMIs matrix), since Daf allows for storing and accessing both layouts of the same data in this case. We simply populate Daf with the row-major data from AnnData and if asked for the outher layout, will relayout! it (and store/cache the result).\n\n\n\n\n\n","category":"module"},{"location":"anndata_format.html#Daf.AnnDataFormat.anndata_as_daf","page":"AnnData Format","title":"Daf.AnnDataFormat.anndata_as_daf","text":"anndata_as_daf(\n adata::Union{AnnData, AbstractString};\n [name::Maybe{AbstractString} = nothing,\n obs_is::Maybe{AbstractString} = nothing,\n var_is::Maybe{AbstractString} = nothing,\n X_is::Maybe{AbstractString} = nothing,\n unsupported_handler::AbnormalHandler = WarnHandler]\n)::MemoryDaf\n\nView AnnData as a Daf data set, specifically using a MemoryDaf. This doesn't duplicate matrices or vectors, but acts as a view containing references to the same ones. Adding and/or deleting data in the view using the Daf API will not affect the original adata.\n\nAny unsupported AnnData annotations will be handled using the unsupported_handler. By default, we'll warn about each and every such unsupported property.\n\nIf adata is a string, then it is the path of an h5ad file which is automatically loaded.\n\nIf not specified, the name will be the value of the \"name\" uns property, if it exists, otherwise, it will be \"anndata\".\n\nIf not specified, obs_is (the name of the \"obs\" axis) will be the value of the \"obs_is\" uns property, if it exists, otherwise, it will be \"obs\".\n\nIf not specified, var_is (the name of the \"var\" axis) will be the value of the \"var_is\" uns property, if it exists, otherwise, it will be \"var\".\n\nIf not specified, X_is (the name of the \"X\" matrix) will be the value of the \"X_is\" uns property, if it exists, otherwise, it will be \"X\".\n\n\n\n\n\n","category":"function"},{"location":"anndata_format.html#Daf.AnnDataFormat.daf_as_anndata","page":"AnnData Format","title":"Daf.AnnDataFormat.daf_as_anndata","text":"daf_as_anndata(\n daf::DafReader;\n [obs_is::Maybe{AbstractString} = nothing,\n var_is::Maybe{AbstractString} = nothing,\n X_is::Maybe{AbstractString} = nothing,\n h5ad::Maybe{AbstractString} = nothing]\n)::AnnData\n\nView the daf data set as AnnData. This doesn't duplicate matrices or vectors, but acts as a view containing references to the same ones. Adding and/or deleting data in the view using the AnnData API will not affect the original daf data set.\n\nIf specified, the result is also written to an h5ad file.\n\nIf not specified, obs_is (the name of the \"obs\" axis) will be the value of the \"obs_is\" scalar property, if it exists, otherwise, it will be \"obs\".\n\nIf not specified, var_is (the name of the \"var\" axis) will be the value of the \"var_is\" scalar property, if it exists, otherwise, it will be \"var\".\n\nIf not specified, X_is (the name of the \"X\" matrix) will be the value of the \"X_is\" scalar property, if it exists, otherwise, it will be \"X\".\n\nEach of the final obs_is, var_is, X_is values is stored as unstructured annotations, unless the default value (\"obs\", \"var\", \"X\") is used.\n\nAll scalar properties, vector properties of the chosen \"obs\" and \"var\" axes, and matrix properties of these axes, are stored in the returned new AnnData object.\n\n\n\n\n\n","category":"function"},{"location":"anndata_format.html#Index","page":"AnnData Format","title":"Index","text":"","category":"section"},{"location":"anndata_format.html","page":"AnnData Format","title":"AnnData Format","text":"Pages = [\"anndata_format.md\"]","category":"page"},{"location":"memory_format.html#Memory-Format","page":"Memory Format","title":"Memory Format","text":"","category":"section"},{"location":"memory_format.html","page":"Memory Format","title":"Memory Format","text":"Daf.MemoryFormat\nDaf.MemoryFormat.MemoryDaf","category":"page"},{"location":"memory_format.html#Daf.MemoryFormat","page":"Memory Format","title":"Daf.MemoryFormat","text":"In-memory Daf storage format.\n\n\n\n\n\n","category":"module"},{"location":"memory_format.html#Daf.MemoryFormat.MemoryDaf","page":"Memory Format","title":"Daf.MemoryFormat.MemoryDaf","text":"struct MemoryDaf <: DafWriter ... end\n\nMemoryDaf(; name = \"memory\")\n\nSimple in-memory storage.\n\nThis just keeps everything in-memory, similarly to the way an AnnData object works; that is, this is a lightweight object that just keeps references to the data it is given.\n\nThis is the \"default\" storage type you should use, unless you need to persist the data on the disk.\n\n\n\n\n\n","category":"type"},{"location":"memory_format.html#Index","page":"Memory Format","title":"Index","text":"","category":"section"},{"location":"memory_format.html","page":"Memory Format","title":"Memory Format","text":"Pages = [\"memory_format.md\"]","category":"page"},{"location":"operations.html#Query-operations","page":"Query operations","title":"Query operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations","category":"page"},{"location":"operations.html#Daf.Operations","page":"Query operations","title":"Daf.Operations","text":"A Daf query can use operations to process the data: EltwiseOperations that preserve the shape of the data, and ReductionOperations that reduce a matrix to a vector, or a vector to a scalar.\n\n\n\n\n\n","category":"module"},{"location":"operations.html#Element-wise-operations","page":"Query operations","title":"Element-wise operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.Abs\nDaf.Operations.Clamp\nDaf.Operations.Convert\nDaf.Operations.Fraction\nDaf.Operations.Log\nDaf.Operations.Round\nDaf.Operations.Significant","category":"page"},{"location":"operations.html#Daf.Operations.Abs","page":"Query operations","title":"Daf.Operations.Abs","text":"Abs([; dtype::Maybe{Type} = nothing])\n\nElement-wise operation that converts every element to its absolute value.\n\nParameters\n\ndtype - The default output data type is the unsigned_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Clamp","page":"Query operations","title":"Daf.Operations.Clamp","text":"Clamp([; min::Maybe{StorageNumber} = nothing, max::Maybe{StorageNumber} = nothing])\n\nElement-wise operation that converts every element to a value inside a range.\n\nParameters\n\nmin - If specified, values lower than this will be increased to this value.\n\nmax - If specified, values higher than this will be increased to this value.\n\nnote: Note\nAt least one of min and max must be specified.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Convert","page":"Query operations","title":"Daf.Operations.Convert","text":"Convert([; dtype::Type])\n\nElement-wise operation that converts every element to a given data type.\n\nParameters\n\ndtype - The data type to convert to. There's no default.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Fraction","page":"Query operations","title":"Daf.Operations.Fraction","text":"Fraction([; dtype::Type])\n\nElement-wise operation that converts every element to its fraction out of the total. If the total is zero, all the fractions are also set to zero. This implicitly assumes (but does not enforce) that all the entry value(s) are positive.\n\nFor matrices, each entry becomes its fraction out of the total of the column it belongs to. For vectors, each entry becomes its fraction out of the total of the vector. For scalars, this operation makes no sense so fails with an error.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Log","page":"Query operations","title":"Daf.Operations.Log","text":"Log(; dtype::Maybe{Type} = nothing, base::StorageNumber = e, eps::StorageNumber = 0)\n\nElement-wise operation that converts every element to its logarithm.\n\nParameters:\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\nbase - The base of the logarithm. By default uses e (that is, computes the natural logarithm), which isn't convenient, but is the standard.\n\neps - Added to the input before computing the logarithm, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Round","page":"Query operations","title":"Daf.Operations.Round","text":"Round([; dtype::Maybe{Type} = nothing])\n\nElement-wise operation that converts every element to the nearest integer value.\n\nParameters\n\ndtype - By default, uses the int_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Significant","page":"Query operations","title":"Daf.Operations.Significant","text":"Significant(; high::StorageNumber, low::Maybe{StorageNumber} = nothing)\n\nElement-wise operation that zeros all \"insignificant\" values. Significant values have a high absolute value. This is typically used to prune matrices of effect sizes (log of ratio between a baseline and some result) for heatmap display. For example, log base 2 of gene expression ratio is typically considered significant if it is at least 3 (that is, a ratio at least 8x or at most 1/8x); for genes that have a significant effect, we typically display all entries with a log of at least 2 (that is, a ratio of at least 4x or at most 1/4x).\n\nFor scalars, this operation makes no sense so fails with an error.\n\nParameters:\n\nhigh - A value is considered significant if its absolute value is higher than this. If all values in a vector (or a matrix column) are less than this, then all the vector (or matrix column) entries are zeroed. There's no default.\n\nlow - If there is at least one significant value in a vector (or a matrix column), then zero all entries that are lower than this. By default, this is the same as the high value. Setting it to a lower value will preserve more entries, but only for vectors (or matrix columns) which contain at least some significant data.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Reduction-operations","page":"Query operations","title":"Reduction operations","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.Sum\nDaf.Operations.Max\nDaf.Operations.Min\nDaf.Operations.Median\nDaf.Operations.Quantile\nDaf.Operations.Mean\nDaf.Operations.GeoMean\nDaf.Operations.Std\nDaf.Operations.StdN\nDaf.Operations.Var\nDaf.Operations.VarN\nDaf.Operations.Mode\nDaf.Operations.Count","category":"page"},{"location":"operations.html#Daf.Operations.Sum","page":"Query operations","title":"Daf.Operations.Sum","text":"Sum(; dtype::Maybe{Type} = nothing)\n\nReduction operation that sums elements.\n\nParameters\n\ndtype - By default, uses the sum_dtype_for the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Max","page":"Query operations","title":"Daf.Operations.Max","text":"Max()\n\nReduction operation that returns the maximal element.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Min","page":"Query operations","title":"Daf.Operations.Min","text":"Min()\n\nReduction operation that returns the minimal element.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Median","page":"Query operations","title":"Daf.Operations.Median","text":"Median(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the median value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Quantile","page":"Query operations","title":"Daf.Operations.Quantile","text":"Quantile(; dtype::Maybe{Type} = nothing, p::StorageNumber)\n\nReduction operation that returns the quantile value, that is, a value such that a certain fraction of the values is lower.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\np - The fraction of values below the result (e.g., the 0 computes the minimum, the 0.5 computes the median, and 1.0 computes the maximum). There's no default.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Mean","page":"Query operations","title":"Daf.Operations.Mean","text":"Mean(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.GeoMean","page":"Query operations","title":"Daf.Operations.GeoMean","text":"GeoMean(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0.0)\n\nReduction operation that returns the geometric mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - The regularization factor added to each value and subtracted from the raw geo-mean, to deal with zero values.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Std","page":"Query operations","title":"Daf.Operations.Std","text":"Std(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the (uncorrected) standard deviation of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.StdN","page":"Query operations","title":"Daf.Operations.StdN","text":"StdN(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0)\n\nReduction operation that returns the (uncorrected) standard deviation of the values, normalized (divided) by the mean value.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - Added to the mean before computing the division, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Var","page":"Query operations","title":"Daf.Operations.Var","text":"Var(; dtype::Maybe{Type} = nothing)\n\nReduction operation that returns the (uncorrected) variance of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.VarN","page":"Query operations","title":"Daf.Operations.VarN","text":"VarN(; dtype::Maybe{Type} = nothing, eps::StorageNumber = 0.0)\n\nReduction operation that returns the (uncorrected) variance of the values, normalized (divided) by the mean of the values.\n\nParameters\n\ndtype - The default output data type is the float_dtype_for of the input data type.\n\neps - Added to the mean before computing the division, to handle zero input data. By default is zero.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Mode","page":"Query operations","title":"Daf.Operations.Mode","text":"Mode()\n\nReduction operation that returns the most frequent value in the input (the \"mode\").\n\nnote: Note\nThis operation supports strings; most operations do not.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Daf.Operations.Count","page":"Query operations","title":"Daf.Operations.Count","text":"Count(; dtype::Maybe{Type} = nothing)\n\nReduction operation that counts elements. This is useful when using GroupBy queries to count the number of elements in each group.\n\nParameters\n\ndtype - By default, uses UInt32.\n\n\n\n\n\n","category":"type"},{"location":"operations.html#Support-functions","page":"Query operations","title":"Support functions","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Daf.Operations.parse_parameter_value\nDaf.Operations.parse_number_value\nDaf.Operations.parse_number_dtype_value\nDaf.Operations.parse_float_dtype_value\nDaf.Operations.parse_int_dtype_value\nDaf.Operations.error_invalid_parameter_value\nDaf.Operations.float_dtype_for\nDaf.Operations.int_dtype_for\nDaf.Operations.unsigned_dtype_for\nDaf.Operations.sum_dtype_for","category":"page"},{"location":"operations.html#Daf.Operations.parse_parameter_value","page":"Query operations","title":"Daf.Operations.parse_parameter_value","text":"parse_parameter_value(\n parse_value::Function,\n operation_name::Token,\n operation_kind::AbstractString,\n parameters_values::Dict{String, Token},\n parameter_name::AbstractString,\n default::Any,\n)::Any\n\nParse an operation parameter.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_number_value","page":"Query operations","title":"Daf.Operations.parse_number_value","text":"parse_number_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n type::Type{T},\n)::T where {T <: StorageNumber}\n\nParse a numeric operation parameter.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_number_dtype_value","page":"Query operations","title":"Daf.Operations.parse_number_dtype_value","text":"parse_number_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nParse the dtype operation parameter.\n\nValid names are {B,b}ool, {UI,ui,I,i}nt{8,16,32,64} and {F,f}loat{32,64}.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_float_dtype_value","page":"Query operations","title":"Daf.Operations.parse_float_dtype_value","text":"parse_float_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nSimilar to parse_number_dtype_value, but only accept floating point types.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.parse_int_dtype_value","page":"Query operations","title":"Daf.Operations.parse_int_dtype_value","text":"parse_int_dtype_value(\n operation_name::AbstractString,\n parameter_name::AbstractString,\n parameter_value::Token,\n)::Maybe{Type}\n\nSimilar to parse_number_dtype_value, but only accept integer (signed or unsigned) types.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.error_invalid_parameter_value","page":"Query operations","title":"Daf.Operations.error_invalid_parameter_value","text":"error_invalid_parameter_value(\n operation_name::Token,\n parameter_name::AbstractString,\n parameter_value::Token,\n must_be::AbstractString,\n)::Nothing\n\nComplain that an operation parameter value is not valid.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.float_dtype_for","page":"Query operations","title":"Daf.Operations.float_dtype_for","text":"float_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that always produces floating point values (e.g., Log). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.int_dtype_for","page":"Query operations","title":"Daf.Operations.int_dtype_for","text":"int_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that always produces integer values (e.g., Round). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.unsigned_dtype_for","page":"Query operations","title":"Daf.Operations.unsigned_dtype_for","text":"unsigned_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that discards the sign of the value (e.g., Abs). If dtype isn't nothing, it is returned instead.\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Daf.Operations.sum_dtype_for","page":"Query operations","title":"Daf.Operations.sum_dtype_for","text":"sum_dtype_for(\n element_type::Type{T},\n dtype::Maybe{Type{D}}\n)::Type where {T <: StorageNumber, D <: StorageNumber}\n\nGiven an input element_type, return the data type to use for the result of an operation that sums many such values values (e.g., Sum). If dtype isn't nothing, it is returned instead.\n\nThis keeps floating point and 64-bit types as-is, but increases any small integer types to the matching 32 bit type (e.g., an input type of UInt8 will have a sum type of UInt32).\n\n\n\n\n\n","category":"function"},{"location":"operations.html#Index","page":"Query operations","title":"Index","text":"","category":"section"},{"location":"operations.html","page":"Query operations","title":"Query operations","text":"Pages = [\"operations.md\"]","category":"page"},{"location":"views.html#Views","page":"Views","title":"Views","text":"","category":"section"},{"location":"views.html","page":"Views","title":"Views","text":"Daf.Views\nDaf.Views.DafView\nDaf.Views.viewer\nDaf.Views.ViewAxes\nDaf.Views.ViewData\nDaf.Views.ALL_SCALARS\nDaf.Views.VIEW_ALL_SCALARS\nDaf.Views.ALL_AXES\nDaf.Views.VIEW_ALL_AXES\nDaf.Views.ALL_VECTORS\nDaf.Views.VIEW_ALL_VECTORS\nDaf.Views.ALL_MATRICES\nDaf.Views.VIEW_ALL_MATRICES\nDaf.Views.VIEW_ALL_DATA","category":"page"},{"location":"views.html#Daf.Views","page":"Views","title":"Daf.Views","text":"Create a different view of Daf data using queries. This is a very flexible mechanism which can be used for a variety of use cases. A simple way of using this is to view a subset of the data as a Daf data set. A variant of this also renames the data properties to adapt them to the requirements of some computation. This makes it simpler to create such tools (using fixed, generic property names) and apply them to arbitrary data (with arbitrary specific property names).\n\n\n\n\n\n","category":"module"},{"location":"views.html#Daf.Views.DafView","page":"Views","title":"Daf.Views.DafView","text":"struct DafView(daf::DafReader) <: DafReader\n\nA read-only wrapper for any DafReader data, which exposes an arbitrary view of it as another DafReadOnly. This isn't typically created manually; instead call viewer.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.viewer","page":"Views","title":"Daf.Views.viewer","text":"viewer(\n daf::DafReader;\n [name::Maybe{AbstractString} = nothing,\n axes::Maybe{ViewAxes} = nothing,\n data::Maybe{ViewData} = nothing]\n)::DafReadOnly\n\nWrap daf data with a read-only DafView. The exposed view is defined by a set of queries applied to the original data. These queries are evaluated only when data is actually accessed. Therefore, creating a view is a relatively cheap operation.\n\nIf the name is not specified, the result name will be based on the name of daf, with a .view suffix.\n\nQueries are listed separately for axes, and scalars, vector and matrix properties, as follows:\n\nnote: Note\nAs an optimization, calling viewer with all-empty (default) arguments returns a simple DafReadOnlyWrapper, that is, it is equivalent to calling read_only. Additionally, saying data = VIEW_ALL_DATA will expose all the data using any of the exposed axes; you can write data = [VIEW_ALL_DATA..., key => nothing] to hide specific data based on its key.\n\n\n\n\n\n","category":"function"},{"location":"views.html#Daf.Views.ViewAxes","page":"Views","title":"Daf.Views.ViewAxes","text":"Specify axes to expose from a view.\n\nThis is specified as a vector of pairs (similar to initializing a Dict). The order of the pairs matter (last one wins).\n\nIf the key is \"*\", then it is replaced by all the names of the axes of the wrapped daf data. Otherwise, the key is just the name of an axis.\n\nIf the value is nothing, then the axis will not be exposed by the view. If the value is \"=\", then the axis will be exposed with the same entries as in the original daf data. Otherwise the value is any valid query that returns a vector of (unique!) strings to serve as the vector entries.\n\nThat is, saying \"*\" => \"=\" (or, VIEW_ALL_AXES will expose all the original daf data axes from the view. Following this by saying \"type\" => nothing will hide the type from the view. Saying \"batch\" => q\"/ batch & age > 1 will expose the batch axis, but only including the batches whose age property is greater than 1.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ViewAxes = AbstractVector{Pair{AbstractString, Maybe{Union{AbstractString, Query}}}}But what we are forced to say is:ViewAxes = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.ViewData","page":"Views","title":"Daf.Views.ViewData","text":"Specify data to expose from view. This is specified as a vector of pairs (similar to initializing a Dict). The order of the pairs matter (last one wins).\n\nScalars are specified similarly to ViewAxes, except that the query should return a scalar instead of a vector. That is, saying \"*\" => \"=\" (or, VIEW_ALL_SCALARS) will expose all the original daf data scalars from the view. Following this by saying \"version\" => nothing will hide the version from the view. Adding \"total_umis\" => q\"/ cell / gene : UMIs %> Sum %> Sum\" will expose a total_umis scalar containing the total sum of all UMIs of all genes in all cells, etc.\n\nVectors are specified similarly to scalars, but require a key specifying both an axis and a property name. The axis must be exposed by the view (based on the axes parameter). If the axis is \"*\", it is replaces by all the exposed axis names specified by the axes parameter. Similarly, if the property name is \"*\" (e.g., (\"gene\", \"*\")), then it is replaced by all the vector properties of the exposed axis in the base data. Therefore if the pair is (\"*\", \"*\") => \"=\" (or VIEW_ALL_VECTORS)`, all vector properties of all the (exposed) axes will also be exposed.\n\nThe value for vectors must be the suffix of a vector query based on the appropriate axis; a value of \"=\" is again used to expose the property as-is. That is, the value for the vector will normally start with the : (Lookup) query operator.\n\nThat is, specifying that axes = [\"gene\" => q\"/ gene & marker\"], and then that data = [(\"gene\", \"forbidden\") => q\": lateral\"], then the view will expose a forbidden vector property for the gene axis, by applying the combined query / gene & marker : lateral to the original daf data.\n\nThis gets trickier when using a query reducing a matrix to a vector. In these cases, the value query will start with / (Axis) query operator to specify the reduced matrix axis, followed by the : (Lookup) operator. When constructing the full query for the data, we can't simply concatenate the suffix to the axis query prefix; instead we need to swap the order of the axes (this is because Julia, in its infinite wisdom, uses column-major matrices, like R and matlab; so reduction eliminates the rows instead of the columns of the matrix).\n\nThat is, specifying axes = [\"cell\" => q\"/ cell & type = TCell\"], and then data = [(\"cell\", \"total_noisy_UMIs\") => q\"/ gene & noisy : UMIs %> Sum will expose total_noisy_UMIs as a per-cell vector property, using the query / gene & noisy / cell & type = TCell : UMIs %> Sum, which will compute the sum of the UMIs of all the noisy genes for each cell (whose type is TCell).\n\nMatrices require a key specifying both axes and a property name. The axes must both be exposed by the view (based on the axes parameter). Again if any or both of the axes are \"*\", they are replaced by all the exposed axes (based on the axes parameter), and likewise if the name is \"*\", it replaced by all the matrix properties of the axes. The value for matrices can again be \"=\" to expose the property as is, or the suffix of a matrix query. Therefore if the pair is (\"*\", \"*\", \"*\") => \"=\" (or, VIEW_ALL_MATRICES), all matrix properties of all the (exposed) axes will also be exposed.\n\nThat is, assuming a gene and cell axes were exposed by the axes parameter, then specifying that (\"cell\", \"gene\", \"log_UMIs\") => q\": UMIs % Log base 2 eps\" will expose the matrix log_UMIs for each cell and gene.\n\nThe order of the axes does not matter, so data = [(\"gene\", \"cell\", \"UMIs\") => \"=\"] has the same effect as data = [(\"cell\", \"gene\", \"UMIs\") => \"=\"].\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:ViewData = AbstractVector{Pair{DataKey, Maybe{Union{AbstractString, Query}}}}But what we are forced to say is:ViewData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"views.html#Daf.Views.ALL_SCALARS","page":"Views","title":"Daf.Views.ALL_SCALARS","text":"A key to use in the data parameter of viewer to specify all the base data scalars.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_SCALARS","page":"Views","title":"Daf.Views.VIEW_ALL_SCALARS","text":"A pair to use in the data parameter of viewer to specify the view exposes all the base data scalars.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_AXES","page":"Views","title":"Daf.Views.ALL_AXES","text":"A pair to use in the axes parameter of viewer to specify all the base data axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_AXES","page":"Views","title":"Daf.Views.VIEW_ALL_AXES","text":"A pair to use in the axes parameter of viewer to specify the view exposes all the base data axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_VECTORS","page":"Views","title":"Daf.Views.ALL_VECTORS","text":"A key to use in the data parameter of viewer to specify all the vectors of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_VECTORS","page":"Views","title":"Daf.Views.VIEW_ALL_VECTORS","text":"A pair to use in the data parameter of viewer to specify the view exposes all the vectors of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.ALL_MATRICES","page":"Views","title":"Daf.Views.ALL_MATRICES","text":"A key to use in the data parameter of viewer to specify all the matrices of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_MATRICES","page":"Views","title":"Daf.Views.VIEW_ALL_MATRICES","text":"A pair to use in the data parameter of viewer to specify the view exposes all the matrices of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Daf.Views.VIEW_ALL_DATA","page":"Views","title":"Daf.Views.VIEW_ALL_DATA","text":"A vector of pairs to use in the data parameters of viewer (using ...) to specify the view exposes all the data of the exposed axes.\n\n\n\n\n\n","category":"constant"},{"location":"views.html#Index","page":"Views","title":"Index","text":"","category":"section"},{"location":"views.html","page":"Views","title":"Views","text":"Pages = [\"views.md\"]","category":"page"},{"location":"writers.html#Writers","page":"Writers","title":"Writers","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers","category":"page"},{"location":"writers.html#Daf.Writers","page":"Writers","title":"Daf.Writers","text":"filled vector: The DafWriter interface specify a high-level API for writing Daf data. This API is implemented here, on top of the low-level FormatWriter API. This is an extension of the DafReader API and provides provides thread safety for reading and writing to the same data set from multiple threads, so the low-level API can (mostly) ignore this issue.\n\n\n\n\n\n","category":"module"},{"location":"writers.html#Scalar-properties","page":"Writers","title":"Scalar properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_scalar!\nDaf.Writers.delete_scalar!\nDaf.Writers.scalar_names","category":"page"},{"location":"writers.html#Daf.Writers.set_scalar!","page":"Writers","title":"Daf.Writers.set_scalar!","text":"set_scalar!(\n daf::DafWriter,\n name::AbstractString,\n value::StorageScalar;\n [overwrite::Bool = false]\n)::Nothing\n\nSet the value of a scalar property with some name in daf.\n\nIf not overwrite (the default), this first verifies the name scalar property does not exist.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_scalar!","page":"Writers","title":"Daf.Writers.delete_scalar!","text":"delete_scalar!(\n daf::DafWriter,\n name::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete a scalar property with some name from daf.\n\nIf must_exist (the default), this first verifies the name scalar property exists in daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Readers.scalar_names","page":"Writers","title":"Daf.Readers.scalar_names","text":"scalar_names(daf::DafReader)::AbstractStringSet\n\nThe names of the scalar properties in daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Writers-axes","page":"Writers","title":"Writers axes","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.add_axis!\nDaf.Writers.delete_axis!","category":"page"},{"location":"writers.html#Daf.Writers.add_axis!","page":"Writers","title":"Daf.Writers.add_axis!","text":"add_axis!(\n daf::DafWriter,\n axis::AbstractString,\n entries::AbstractStringVector\n)::Nothing\n\nAdd a new axis to daf.\n\nThis first verifies the axis does not exist and that the entries are unique.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_axis!","page":"Writers","title":"Daf.Writers.delete_axis!","text":"delete_axis!(\n daf::DafWriter,\n axis::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete an axis from the daf. This will also delete any vector or matrix properties that are based on this axis.\n\nIf must_exist (the default), this first verifies the axis exists in the daf.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Vector-properties","page":"Writers","title":"Vector properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_vector!\nDaf.Writers.delete_vector!","category":"page"},{"location":"writers.html#Daf.Writers.set_vector!","page":"Writers","title":"Daf.Writers.set_vector!","text":"set_vector!(\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n vector::Union{StorageScalar, StorageVector};\n [overwrite::Bool = false]\n)::Nothing\n\nSet a vector property with some name for some axis in daf.\n\nIf the vector specified is actually a StorageScalar, the stored vector is filled with this value.\n\nThis first verifies the axis exists in daf, that the property name isn't name, and that the vector has the appropriate length. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_vector!","page":"Writers","title":"Daf.Writers.delete_vector!","text":"delete_vector!(\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString;\n must_exist::Bool = true,\n)::Nothing\n\nDelete a vector property with some name for some axis from daf.\n\nThis first verifies the axis exists in daf and that the property name isn't name. If must_exist (the default), this also verifies the name vector exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Matrix-properties","page":"Writers","title":"Matrix properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.set_matrix!\nDaf.Writers.relayout_matrix!\nDaf.Writers.delete_matrix!","category":"page"},{"location":"writers.html#Daf.Writers.set_matrix!","page":"Writers","title":"Daf.Writers.set_matrix!","text":"set_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n matrix::Union{StorageNumber, StorageMatrix};\n [overwrite::Bool = false,\n relayout::Bool = true]\n)::Nothing\n\nSet the matrix property with some name for some rows_axis and columns_axis in daf. Since this is Julia, this should be a column-major matrix.\n\nIf the matrix specified is actually a StorageScalar, the stored matrix is filled with this value.\n\nIf relayout (the default), this will also automatically relayout! the matrix and store the result, so the data would also be stored in row-major layout (that is, with the axes flipped), similarly to calling relayout_matrix!.\n\nThis first verifies the rows_axis and columns_axis exist in daf, that the matrix is column-major of the appropriate size. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.relayout_matrix!","page":"Writers","title":"Daf.Writers.relayout_matrix!","text":"relayout_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [overwrite::Bool = false]\n)::Nothing\n\nGiven a matrix property with some name exists (in column-major layout) in daf for the rows_axis and the columns_axis, then relayout! it and store the row-major result as well (that is, with flipped axes).\n\nThis is useful following calling empty_dense_matrix! or empty_sparse_matrix! to ensure both layouts of the matrix are stored in def. When calling set_matrix!, it is simpler to just specify (the default) relayout = true.\n\nThis first verifies the rows_axis and columns_axis exist in daf, and that there is a name (column-major) matrix property for them. If not overwrite (the default), this also verifies the name matrix does not exist for the flipped rows_axis and columns_axis.\n\nnote: Note\nA restriction of the way Daf stores data is that square data is only stored in one (column-major) layout (e.g., to store a weighted directed graph between cells, you may store an outgoingweights matrix where each cell's column holds the outgoing weights from the cell to the other cells. In this case you can't ask Daf to relayout the matrix to row-major order so that each cell's row would be the incoming weights from the other cells. Instead you would need to explicitly store a separate incomingweights matrix where each cell's column holds the incoming weights).\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.delete_matrix!","page":"Writers","title":"Daf.Writers.delete_matrix!","text":"delete_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [must_exist::Bool = true,\n relayout::Bool = true]\n)::Nothing\n\nDelete a matrix property with some name for some rows_axis and columns_axis from daf.\n\nIf relayout (the default), this will also delete the matrix in the other layout (that is, with flipped axes).\n\nThis first verifies the rows_axis and columns_axis exist in daf. If must_exist (the default), this also verifies the name matrix exists for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Creating-properties","page":"Writers","title":"Creating properties","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Daf.Writers.empty_dense_vector!\nDaf.Writers.empty_sparse_vector!\nDaf.Writers.empty_dense_matrix!\nDaf.Writers.empty_sparse_matrix!","category":"page"},{"location":"writers.html#Daf.Writers.empty_dense_vector!","page":"Writers","title":"Daf.Writers.empty_dense_vector!","text":"empty_dense_vector!(\n fill::Function,\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T};\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber}\n\nCreate an empty dense vector property with some name for some axis in daf, pass it to fill, and return the result.\n\nThe returned vector will be uninitialized; the caller is expected to fill it with values. This saves creating a copy of the vector before setting it in the data, which makes a huge difference when creating vectors on disk (using memory mapping). For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis first verifies the axis exists in daf and that the property name isn't name. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_sparse_vector!","page":"Writers","title":"Daf.Writers.empty_sparse_vector!","text":"empty_sparse_vector!(\n fill::Function,\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n indtype::Maybe{Type{I}} = nothing;\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber, I <: StorageInteger}\n\nCreate an empty sparse vector property with some name for some axis in daf, pass its parts (nzind and nzval) to fill, and return the result.\n\nIf indtype is not specified, it is chosen automatically to be the smallest unsigned integer type needed for the vector.\n\nThe returned vector will be uninitialized; the caller is expected to fill its nzind and nzval vectors with values. Specifying the nnz makes their sizes known in advance, to allow pre-allocating disk data. For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis severely restricts the usefulness of this function, because typically nnz is only know after fully computing the matrix. Still, in some cases a large sparse vector is created by concatenating several smaller ones; this function allows doing so directly into the data vector, avoiding a copy in case of memory-mapped disk formats.\n\nwarning: Warning\nIt is the caller's responsibility to fill the two vectors with valid data. Specifically, you must ensure:nzind[1] == 1\nnzind[i] <= nzind[i + 1]\nnzind[end] == nnz\n\nThis first verifies the axis exists in daf and that the property name isn't name. If not overwrite (the default), this also verifies the name vector does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_dense_matrix!","page":"Writers","title":"Daf.Writers.empty_dense_matrix!","text":"empty_dense_matrix!(\n fill::Function,\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T};\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber}\n\nCreate an empty dense matrix property with some name for some rows_axis and columns_axis in daf, pass it to fill, and return the result. Since this is Julia, this will be a column-major matrix.\n\nThe returned matrix will be uninitialized; the caller is expected to fill it with values. This saves creating a copy of the matrix before setting it in daf, which makes a huge difference when creating matrices on disk (using memory mapping). For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis first verifies the rows_axis and columns_axis exist in daf, that the matrix is column-major of the appropriate size. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Daf.Writers.empty_sparse_matrix!","page":"Writers","title":"Daf.Writers.empty_sparse_matrix!","text":"empty_sparse_matrix!(\n fill::Function,\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n intdype::Maybe{Type{I}} = nothing;\n [overwrite::Bool = false]\n)::Any where {T <: StorageNumber, I <: StorageInteger}\n\nCreate an empty sparse matrix property with some name for some rows_axis and columns_axis in daf, pass its parts (colptr, rowval and nzval) to fill, and return the result.\n\nIf indtype is not specified, it is chosen automatically to be the smallest unsigned integer type needed for the matrix.\n\nThe returned matrix will be uninitialized; the caller is expected to fill its colptr, rowval and nzval vectors. Specifying the nnz makes their sizes known in advance, to allow pre-allocating disk space. For this reason, this does not work for strings, as they do not have a fixed size.\n\nThis severely restricts the usefulness of this function, because typically nnz is only know after fully computing the matrix. Still, in some cases a large sparse matrix is created by concatenating several smaller ones; this function allows doing so directly into the data, avoiding a copy in case of memory-mapped disk formats.\n\nwarning: Warning\n\n\nIt is the caller's responsibility to fill the three vectors with valid data. Specifically, you must ensure:\n\ncolptr[1] == 1\ncolptr[end] == nnz + 1\ncolptr[i] <= colptr[i + 1]\nfor all j, for all i such that colptr[j] <= i and i + 1 < colptr[j + 1], 1 <= rowptr[i] < rowptr[i + 1] <= nrows\n\nThis first verifies the rows_axis and columns_axis exist in daf. If not overwrite (the default), this also verifies the name matrix does not exist for the rows_axis and columns_axis.\n\n\n\n\n\n","category":"function"},{"location":"writers.html#Index","page":"Writers","title":"Index","text":"","category":"section"},{"location":"writers.html","page":"Writers","title":"Writers","text":"Pages = [\"writers.md\"]","category":"page"},{"location":"formats.html#Formats","page":"Formats","title":"Formats","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats\nDaf.Formats.DataKey","category":"page"},{"location":"formats.html#Daf.Formats","page":"Formats","title":"Daf.Formats","text":"The FormatReader and FormatWriter interfaces specify a low-level API for storing Daf data. To extend Daf to support an additional format, create a new implementation of this API.\n\nA storage format object contains some named scalar data, a set of axes (each with a unique name for each entry), and named vector and matrix data based on these axes.\n\nData properties are identified by a unique name given the axes they are based on. That is, there is a separate namespace for scalar properties, vector properties for each specific axis, and matrix properties for each (ordered) pair of axes.\n\nFor matrices, we keep careful track of their MatrixLayouts. Specifically, a storage format only deals with column-major matrices, listed under the rows axis first and the columns axis second. A storage format object may hold two copies of the same matrix, in both possible memory layouts, in which case it will be listed twice, under both axes orders.\n\nIn general, storage format objects are as \"dumb\" as possible, to make it easier to support new storage formats. The required functions implement a glorified key-value repository, with the absolutely minimal necessary logic to deal with the separate property namespaces listed above.\n\nFor clarity of documentation, we split the type hierarchy to DafWriter <: FormatWriter <: DafReader <: FormatReader.\n\nThe functions listed here use the FormatReader for read-only operations and FormatWriter for write operations into a Daf storage. This is a low-level API, not meant to be used from outside the package, and therefore is not re-exported from the top-level Daf namespace.\n\nIn contrast, the functions using DafReader and DafWriter describe the high-level API meant to be used from outside the package, and are re-exported. These functions are listed in the Daf.Readers and Daf.Writers modules. These functions provide all the logic common to any storage format, allowing us to keep the format-specific functions as simple as possible.\n\nThat is, when implementing a new Daf storage format, you should write struct MyFormat <: DafWriter, and implement the functions listed here for both FormatReader and FormatWriter.\n\n\n\n\n\n","category":"module"},{"location":"formats.html#Daf.Formats.DataKey","page":"Formats","title":"Daf.Formats.DataKey","text":"A key specifying some data property in Daf.\n\nScalars are identified by their name.\n\nVectors are specified as a tuple of the axis name and the property name.\n\nMatrices are specified as a tuple or the rows axis, the columns axis, and the property name.\n\nThe DafReader and DafWriter interfaces do not use this type, as each function knows exactly the type of data property it works on. However, higher-level APIs do use this as keys for dictionaries etc.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Read-API","page":"Formats","title":"Read API","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.DafReader\nDaf.Formats.FormatReader\nDaf.Formats.Internal","category":"page"},{"location":"formats.html#Daf.Formats.DafReader","page":"Formats","title":"Daf.Formats.DafReader","text":"A high-level abstract interface for read-only access to Daf data.\n\nAll the functions for this type are provided based on the functions required for FormatReader. See the Daf.Readers module for their description.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.FormatReader","page":"Formats","title":"Daf.Formats.FormatReader","text":"An low-level abstract interface for reading from Daf storage formats.\n\nWe require each storage format to have a .internal::Internal property. This enables all the high-level DafReader functions.\n\nEach storage format must implement the functions listed below for reading from the storage.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.Internal","page":"Formats","title":"Daf.Formats.Internal","text":"Internal(name::AbstractString)\n\nInternal data we need to keep in any concrete FormatReader. This has to be available as a .internal data member of the concrete format. This enables all the high-level DafReader and DafWriter functions.\n\nThe constructor will automatically call unique_name to try and make the names unique for improved error messages.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Caching","page":"Formats","title":"Caching","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.CacheType\nDaf.Formats.empty_cache!","category":"page"},{"location":"formats.html#Daf.Formats.CacheType","page":"Formats","title":"Daf.Formats.CacheType","text":"Types of cached data inside Daf.\n\nMappedData - memory-mapped disk data. This is the cheapest data, as it doesn't put pressure on the garbage collector. It requires some OS resources to maintain the mapping, and physical memory for the subset of the data that is actually being accessed. That is, one can memory map larger data than the physical memory, and performance will be good, as long as the subset of the data that is actually accessed is small enough to fit in memory. If it isn't, the performance will drop (a lot!) because the OS will be continuously reading data pages from disk - but it will not crash due to an out of memory error. It is very important not to re-map the same data twice because that causes all sort of inefficiencies and edge cases in the hardware and low-level software.\nMemoryData - disk data copied to application memory, or alternative layout of data matrices. This does pressure the garbage collector and can cause out of memory errors. However, re-fetching the data from disk is very slow, so caching this data is crucial for performance.\nQueryData - data that is computed by queries based on stored data (e.g., masked data, or results of a reduction or an element-wise operation). This again takes up application memory and may cause out of memory errors, but it is very useful to cache the results when the same query is executed multiple times (e.g., when using views). Manually executing queries therefore allows to explicitly disable the caching of the query results, since some queries will not be repeated.\n\nIf too much data has been cached, call empty_cache! to release it.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.empty_cache!","page":"Formats","title":"Daf.Formats.empty_cache!","text":"empty_cache!(\n daf::FormatReader;\n [clear::Maybe{CacheType} = nothing,\n keep::Maybe{CacheType} = nothing]\n)::Nothing\n\nClear some cached data. By default, completely empties the caches. You can specify either clear, to only forget a specific CacheType (e.g., for clearing only QueryData), or keep, to forget everything except a specific CacheType (e.g., for keeping only MappedData). You can't specify both clear and keep.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Description","page":"Formats","title":"Description","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_description_header\nDaf.Formats.format_description_footer","category":"page"},{"location":"formats.html#Daf.Formats.format_description_header","page":"Formats","title":"Daf.Formats.format_description_header","text":"format_description_header(format::FormatReader, lines::Vector{String})::Nothing\n\nAllow a format to amit additional description header lines.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_description_footer","page":"Formats","title":"Daf.Formats.format_description_footer","text":"format_description_footer(format::FormatReader, lines::Vector{String})::Nothing\n\nAllow a format to amit additional description footer lines. If deep, this also emit the description of any data sets nested in this one, if any.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Scalar-properties","page":"Formats","title":"Scalar properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_scalar\nDaf.Formats.format_scalar_names\nDaf.Formats.format_get_scalar","category":"page"},{"location":"formats.html#Daf.Formats.format_has_scalar","page":"Formats","title":"Daf.Formats.format_has_scalar","text":"format_has_scalar(format::FormatReader, name::AbstractString)::Bool\n\nCheck whether a scalar property with some name exists in format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_scalar_names","page":"Formats","title":"Daf.Formats.format_scalar_names","text":"format_scalar_names(format::FormatReader)::AbstractStringSet\n\nThe names of the scalar properties in format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_scalar","page":"Formats","title":"Daf.Formats.format_get_scalar","text":"format_get_scalar(format::FormatReader, name::AbstractString)::StorageScalar\n\nImplement fetching the value of a scalar property with some name in format.\n\nThis trusts that we have a read lock on the data set, and that the name scalar property exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Data-axes","page":"Formats","title":"Data axes","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_axis\nDaf.Formats.format_axis_names\nDaf.Formats.format_get_axis\nDaf.Formats.format_axis_length","category":"page"},{"location":"formats.html#Daf.Formats.format_has_axis","page":"Formats","title":"Daf.Formats.format_has_axis","text":"format_has_axis(format::FormatReader, axis::AbstractString; for_change::Bool)::Bool\n\nCheck whether some axis exists in format. If for_change, this is done just prior to adding or deleting the axis.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_axis_names","page":"Formats","title":"Daf.Formats.format_axis_names","text":"format_axis_names(format::FormatReader)::AbstractStringSet\n\nThe names of the axes of format.\n\nThis trusts that we have a read lock on the data set.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_axis","page":"Formats","title":"Daf.Formats.format_get_axis","text":"format_get_axis(format::FormatReader, axis::AbstractString)::AbstractStringVector\n\nImplement fetching the unique names of the entries of some axis of format.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_axis_length","page":"Formats","title":"Daf.Formats.format_axis_length","text":"format_axis_length(format::FormatReader, axis::AbstractString)::Int64\n\nImplement fetching the number of entries along the axis.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Vector-properties","page":"Formats","title":"Vector properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_vector\nDaf.Formats.format_vector_names\nDaf.Formats.format_get_vector","category":"page"},{"location":"formats.html#Daf.Formats.format_has_vector","page":"Formats","title":"Daf.Formats.format_has_vector","text":"format_has_vector(format::FormatReader, axis::AbstractString, name::AbstractString)::Bool\n\nImplement checking whether a vector property with some name exists for the axis in format.\n\nThis trusts that we have a read lock on the data set, that the axis exists in format and that the property name isn't name.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_vector_names","page":"Formats","title":"Daf.Formats.format_vector_names","text":"format_vector_names(format::FormatReader, axis::AbstractString)::AbstractStringSet\n\nImplement fetching the names of the vectors for the axis in format, not including the special name property.\n\nThis trusts that we have a read lock on the data set, and that the axis exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_vector","page":"Formats","title":"Daf.Formats.format_get_vector","text":"format_get_vector(format::FormatReader, axis::AbstractString, name::AbstractString)::StorageVector\n\nImplement fetching the vector property with some name for some axis in format.\n\nThis trusts that we have a read lock on the data set, that the axis exists in format, and the name vector property exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Matrix-properties","page":"Formats","title":"Matrix properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_has_matrix\nDaf.Formats.format_matrix_names\nDaf.Formats.format_get_matrix","category":"page"},{"location":"formats.html#Daf.Formats.format_has_matrix","page":"Formats","title":"Daf.Formats.format_has_matrix","text":"format_has_matrix(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n [for_relayout::Bool = false]\n)::Bool\n\nImplement checking whether a matrix property with some name exists for the rows_axis and the columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and the columns_axis exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_matrix_names","page":"Formats","title":"Daf.Formats.format_matrix_names","text":"format_matrix_names(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n)::AbstractStringSet\n\nImplement fetching the names of the matrix properties for the rows_axis and columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and columns_axis exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_matrix","page":"Formats","title":"Daf.Formats.format_get_matrix","text":"format_get_matrix(\n format::FormatReader,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString\n)::StorageMatrix\n\nImplement fetching the matrix property with some name for some rows_axis and columns_axis in format.\n\nThis trusts that we have a read lock on the data set, and that the rows_axis and columns_axis exist in format, and the name matrix property exists for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Write-API","page":"Formats","title":"Write API","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.DafWriter\nDaf.Formats.FormatWriter","category":"page"},{"location":"formats.html#Daf.Formats.DafWriter","page":"Formats","title":"Daf.Formats.DafWriter","text":"A high-level abstract interface for write access to Daf data.\n\nAll the functions for this type are provided based on the functions required for FormatWriter. See the Daf.Writers module for their description.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Daf.Formats.FormatWriter","page":"Formats","title":"Daf.Formats.FormatWriter","text":"An abstract interface for writing into Daf storage formats.\n\nEach storage format must implement the functions listed below for writing into the storage.\n\n\n\n\n\n","category":"type"},{"location":"formats.html#Scalar-properties-2","page":"Formats","title":"Scalar properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_scalar!\nDaf.Formats.format_delete_scalar!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_scalar!","page":"Formats","title":"Daf.Formats.format_set_scalar!","text":"format_set_scalar!(\n format::FormatWriter,\n name::AbstractString,\n value::StorageScalar,\n)::Nothing\n\nImplement setting the value of a scalar property with some name in format.\n\nThis trusts that we have a write lock on the data set, and that the name scalar property does not exist in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_scalar!","page":"Formats","title":"Daf.Formats.format_delete_scalar!","text":"format_delete_scalar!(\n format::FormatWriter,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a scalar property with some name from format. If for_set, this is done just prior to setting the scalar with a different value.\n\nThis trusts that we have a write lock on the data set, and that the name scalar property exists in format.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Data-axes-2","page":"Formats","title":"Data axes","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_add_axis!\nDaf.Formats.format_delete_axis!","category":"page"},{"location":"formats.html#Daf.Formats.format_add_axis!","page":"Formats","title":"Daf.Formats.format_add_axis!","text":"format_add_axis!(\n format::FormatWriter,\n axis::AbstractString,\n entries::AbstractStringVector\n)::Nothing\n\nImplement adding a new axis to format.\n\nThis trusts we have a write lock on the data set, that the axis does not already exist in format, and that the names of the entries are unique.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_axis!","page":"Formats","title":"Daf.Formats.format_delete_axis!","text":"format_delete_axis!(format::FormatWriter, axis::AbstractString)::Nothing\n\nImplement deleting some axis from format.\n\nThis trusts This trusts we have a write lock on the data set, that the axis exists in format, and that all properties that are based on this axis have already been deleted.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Vector-properties-2","page":"Formats","title":"Vector properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_vector!\nDaf.Formats.format_delete_vector!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_vector!","page":"Formats","title":"Daf.Formats.format_set_vector!","text":"format_set_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n vector::Union{StorageScalar, StorageVector},\n)::Nothing\n\nImplement setting a vector property with some name for some axis in format.\n\nIf the vector specified is actually a StorageScalar, the stored vector is filled with this value.\n\nThis trusts we have a write lock on the data set, that the axis exists in format, that the vector property name isn't \"name\", that it does not exist for the axis, and that the vector has the appropriate length for it.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_vector!","page":"Formats","title":"Daf.Formats.format_delete_vector!","text":"format_delete_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a vector property with some name for some axis from format. If for_set, this is done just prior to setting the vector with a different value.\n\nThis trusts we have a write lock on the data set, that the axis exists in format, that the vector property name isn't name, and that the name vector exists for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Matrix-properties-2","page":"Formats","title":"Matrix properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_set_matrix!\nDaf.Formats.format_relayout_matrix!\nDaf.Formats.format_delete_matrix!","category":"page"},{"location":"formats.html#Daf.Formats.format_set_matrix!","page":"Formats","title":"Daf.Formats.format_set_matrix!","text":"format_set_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n matrix::StorageMatrix,\n)::Nothing\n\nImplement setting the matrix property with some name for some rows_axis and columns_axis in format.\n\nIf the matrix specified is actually a StorageScalar, the stored matrix is filled with this value.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format, that the name matrix property does not exist for them, and that the matrix is column-major of the appropriate size for it.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_relayout_matrix!","page":"Formats","title":"Daf.Formats.format_relayout_matrix!","text":"format_relayout_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString\n)::Nothing\n\nrelayout! the existing name column-major matrix property for the rows_axis and the columns_axis and store the results as a row-major matrix property (that is, with flipped axes).\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis are different from each other, exist in format, that the name matrix property exists for them, and that it does not exist for the flipped axes.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_delete_matrix!","page":"Formats","title":"Daf.Formats.format_delete_matrix!","text":"format_delete_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString;\n for_set::Bool\n)::Nothing\n\nImplement deleting a matrix property with some name for some rows_axis and columns_axis from format. If for_set, this is done just prior to setting the matrix with a different value.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format, and that the name matrix property exists for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Creating-properties","page":"Formats","title":"Creating properties","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Daf.Formats.format_get_empty_dense_vector!\nDaf.Formats.format_filled_empty_dense_vector!\nDaf.Formats.format_get_empty_sparse_vector!\nDaf.Formats.format_filled_empty_sparse_vector!\nDaf.Formats.format_get_empty_dense_matrix!\nDaf.Formats.format_filled_empty_dense_matrix!\nDaf.Formats.format_get_empty_sparse_matrix!\nDaf.Formats.format_filled_empty_sparse_matrix!","category":"page"},{"location":"formats.html#Daf.Formats.format_get_empty_dense_vector!","page":"Formats","title":"Daf.Formats.format_get_empty_dense_vector!","text":"format_get_empty_dense_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n)::VectorVector where {T <: StorageNumber}\n\nImplement setting a vector property with some name for some axis in format.\n\nImplement creating an empty dense matrix with some name for some rows_axis and columns_axis in format.\n\nThis trusts we have a write lock on the data set, that the axis exists in format and that the vector property name isn't \"name\", and that it does not exist for the axis.\n\nnote: Note\nThe return type of this function is always a functionally dense vector, that is, it will have strides of (1,), so that elements are consecutive in memory. However it need not be an actual DenseVector because of Julia's type system's limitations.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_empty_dense_vector!","page":"Formats","title":"Daf.Formats.format_filled_empty_dense_vector!","text":"format_filled_empty_dense_vector!(\n daf::DafWriter,\n axis::AbstractString,\n name::AbstractString,\n filled_vector::AbstractVector{T},\n)::Nothing where {T <: StorageNumber}\n\nAllow the format to perform caching once the empty dense vector has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_empty_sparse_vector!","page":"Formats","title":"Daf.Formats.format_get_empty_sparse_vector!","text":"format_get_empty_sparse_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n nnz::StorageInteger,\n indtype::Type{I},\n)::Tuple{AbstractVector{I}, AbstractVector{T}, Any}\nwhere {T <: StorageNumber, I <: StorageInteger}\n\nImplement creating an empty dense vector property with some name for some rows_axis and columns_axis in format. The final tuple element is passed to format_filled_empty_sparse_vector!.\n\nThis trusts we have a write lock on the data set, that the axis exists in format and that the vector property name isn't \"name\", and that it does not exist for the axis.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_empty_sparse_vector!","page":"Formats","title":"Daf.Formats.format_filled_empty_sparse_vector!","text":"format_filled_empty_sparse_vector!(\n format::FormatWriter,\n axis::AbstractString,\n name::AbstractString,\n extra::Any,\n filled::SparseVector{T, I},\n)::Nothing where {T <: StorageNumber, I <: StorageInteger}\n\nAllow the format to perform caching once the empty sparse vector has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_empty_dense_matrix!","page":"Formats","title":"Daf.Formats.format_get_empty_dense_matrix!","text":"format_get_empty_dense_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n)::AbstractMatrix{T} where {T <: StorageNumber}\n\nImplement creating an empty dense matrix property with some name for some rows_axis and columns_axis in format.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format and that the name matrix property does not exist for them.\n\nnote: Note\nThe return type of this function is always a functionally dense vector, that is, it will have strides of (1,nrows), so that elements are consecutive in memory. However it need not be an actual DenseMatrix because of Julia's type system's limitations.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_empty_dense_matrix!","page":"Formats","title":"Daf.Formats.format_filled_empty_dense_matrix!","text":"format_filled_empty_dense_matrix!(\n daf::DafWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n filled_matrix::AbstractVector{T},\n)::Nothing where {T <: StorageNumber}\n\nAllow the format to perform caching once the empty dense matrix has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_get_empty_sparse_matrix!","page":"Formats","title":"Daf.Formats.format_get_empty_sparse_matrix!","text":"format_get_empty_sparse_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n eltype::Type{T},\n intdype::Type{I},\n nnz::StorageInteger,\n)::Tuple{AbstractVector{I}, AbstractVector{I}, AbstractVector{T}, Any}\nwhere {T <: StorageNumber, I <: StorageInteger}\n\nImplement creating an empty sparse matrix property with some name for some rows_axis and columns_axis in format. The final tuple element is passed to format_filled_empty_sparse_matrix!.\n\nThis trusts we have a write lock on the data set, that the rows_axis and columns_axis exist in format and that the name matrix property does not exist for them.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Daf.Formats.format_filled_empty_sparse_matrix!","page":"Formats","title":"Daf.Formats.format_filled_empty_sparse_matrix!","text":"format_filled_empty_dense_matrix!(\n format::FormatWriter,\n rows_axis::AbstractString,\n columns_axis::AbstractString,\n name::AbstractString,\n extra::Any,\n filled::SparseMatrixCSC{T, I},\n)::Nothing where {T <: StorageNumber, I <: StorageInteger}\n\nAllow the format to perform caching once the empty sparse matrix has been filled. By default this does nothing.\n\n\n\n\n\n","category":"function"},{"location":"formats.html#Index","page":"Formats","title":"Index","text":"","category":"section"},{"location":"formats.html","page":"Formats","title":"Formats","text":"Pages = [\"formats.md\"]","category":"page"},{"location":"example_data.html#Example-data","page":"Example data","title":"Example data","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Daf.ExampleData","category":"page"},{"location":"example_data.html#Daf.ExampleData","page":"Example data","title":"Daf.ExampleData","text":"Example data for doctest tests.\n\n\n\n\n\n","category":"module"},{"location":"example_data.html#Example-Storage","page":"Example data","title":"Example Storage","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Daf.ExampleData.example_daf","category":"page"},{"location":"example_data.html#Daf.ExampleData.example_daf","page":"Example data","title":"Daf.ExampleData.example_daf","text":"Create an example MemoryDaf to use for doctest tests.\n\n\n\n\n\n","category":"function"},{"location":"example_data.html#Index","page":"Example data","title":"Index","text":"","category":"section"},{"location":"example_data.html","page":"Example data","title":"Example data","text":"Pages = [\"example_data.md\"]","category":"page"},{"location":"read_only.html#Read-only","page":"Read-only","title":"Read-only","text":"","category":"section"},{"location":"read_only.html","page":"Read-only","title":"Read-only","text":"Daf.ReadOnly\nDaf.ReadOnly.DafReadOnly\nDaf.ReadOnly.read_only\nDaf.ReadOnly.DafReadOnlyWrapper","category":"page"},{"location":"read_only.html#Daf.ReadOnly","page":"Read-only","title":"Daf.ReadOnly","text":"Read-only Daf storage format.\n\n\n\n\n\n","category":"module"},{"location":"read_only.html#Daf.ReadOnly.DafReadOnly","page":"Read-only","title":"Daf.ReadOnly.DafReadOnly","text":"A common base type for a read-only DafReader, which doesn't allow any modification of the data.\n\n\n\n\n\n","category":"type"},{"location":"read_only.html#Daf.ReadOnly.read_only","page":"Read-only","title":"Daf.ReadOnly.read_only","text":"read_only(daf::DafReader[; name::Maybe{AbstractString]} = nothing)::DafReadOnlyWrapper\n\nWrap daf with a DafReadOnlyWrapper to protect it against accidental modification. If not specified, the name of the daf is reused. If name is not specified and daf isa DafReadOnly, return it as-is.\n\n\n\n\n\n","category":"function"},{"location":"read_only.html#Daf.ReadOnly.DafReadOnlyWrapper","page":"Read-only","title":"Daf.ReadOnly.DafReadOnlyWrapper","text":"struct DafReadOnlyWrapper <: DafReader ... end\n\nA wrapper for any DafWriter data, protecting it against accidental modification. This isn't exported and isn't created manually; instead call read_only.\n\n\n\n\n\n","category":"type"},{"location":"read_only.html#Index","page":"Read-only","title":"Index","text":"","category":"section"},{"location":"read_only.html","page":"Read-only","title":"Read-only","text":"Pages = [\"read_only.md\"]","category":"page"},{"location":"concat.html#Concat","page":"Concat","title":"Concat","text":"","category":"section"},{"location":"concat.html","page":"Concat","title":"Concat","text":"Daf.Concat\nDaf.Concat.concatenate\nDaf.Concat.MergeData\nDaf.Concat.MergeAction","category":"page"},{"location":"concat.html#Daf.Concat","page":"Concat","title":"Daf.Concat","text":"Concatenate multiple Daf data sets along some axis. This copies the data from the concatenated data sets into some target data set.\n\nThe exact behavior of concatenation is surprisingly complex when accounting for sparse vs. dense matrices, different matrix layouts, and properties which are not along the concatenation axis. The implementation is further complicated by minimizing the allocation of intermediate memory buffers for the data; that is, in principle, concatenating from and into memory-mapped data sets should not allocate \"any\" memory buffers - the data should be copied directly from one memory-mapped region to another.\n\n\n\n\n\n","category":"module"},{"location":"concat.html#Daf.Concat.concatenate","page":"Concat","title":"Daf.Concat.concatenate","text":"concatenate(\n destination::DafWriter,\n axis::Union{AbstractString, AbstractStringVector},\n sources::AbstractVector{<:DafReader};\n [names::Maybe{AbstractStringVector} = nothing,\n dataset_axis::Maybe{AbstractString} = \"dataset\",\n dataset_property::Bool = true,\n prefix::Union{Bool, AbstractVector{Bool}} = false,\n prefixed::Maybe{Union{AbstractStringSet, AbstractVector{<:AbstractStringSet}}} = nothing,\n empty::Maybe{EmptyData} = nothing,\n sparse_if_saves_storage_fraction = 0.25,\n merge::Maybe{MergeData} = nothing,\n overwrite::Bool = false]\n)::Nothing\n\nConcatenate data from a sources sequence of Daf data sets into a single destination data set along one or more concatenation axis. You can also concatenate along multiple axes by specifying an array of axis names.\n\nWe need a unique name for each of the concatenated data sets. By default, we use the DafReader.name. You can override this by specifying an explicit names vector with one name per data set.\n\nBy default, a new axis named by dataset_axis is created with one entry per concatenated data set, using these unique names. You can disable this by setting dataset_axis to nothing.\n\nIf an axis is created, and dataset_property is set (the default), a property with the same name is created for the concatenated axis, containing the name of the data set each entry was collected from.\n\nThe entries of each concatenated axis must be unique. By default, we require that no entry name is used in more than one data set. If this isn't the case, then set prefix to specify adding the unique data set name (and a . separator) to its entries (either once for all the axes, or using a vector with a setting per axis).\n\nnote: Note\nIf a prefix is added to the axis entry names, then it must also be added to all the vector properties whose values are entries of the axis. By default, we assume that any property name that is identical to the axis name is such a property (e.g., given a cluster axis, a cluster property of each cell is assumed to contain the names of clusters from that axis). We also allow for property names to just start with the axis name, followed by . and some suffix (e.g., cluster.manual will also be assumed to contain the names of clusters). We'll automatically add the unique prefix to all such properties.If, however, this heuristic fails, you can specify a vector of properties to be prefixed (or a vector of such vectors, one per concatenated axis). In this case only the listed properties will be prefixed with the unique data set names.\n\nVector and matrix properties for the axis will be concatenated. If some of the concatenated data sets do not contain some property, then an empty value must be specified for it, and will be used for the missing data.\n\nConcatenated matrices are always stored in column-major layout where the concatenation axis is the column axis. There should not exist any matrices whose both axes are concatenated (e.g., square matrices of the concatenated axis).\n\nThe concatenated properties will be sparse if the storage for the sparse data is smaller than naive dense storage by at sparse_if_saves_storage_fraction (by default, if using sparse storage saves at least 25% of the space, that is, takes at most 75% of the dense storage space). When estimating this fraction, we assume dense data is 100% non-zero; we only take into account data already stored as sparse, as well as any missing data whose empty value is zero.\n\nBy default, properties that do not apply to any of the concatenation axis will be ignored. If merge is specified, then such properties will be processed according to it. Using CollectAxis for a property requires that the dataset_axis will not be nothing.\n\nBy default, concatenation will fail rather than overwrite existing properties in the target.\n\n\n\n\n\n","category":"function"},{"location":"concat.html#Daf.Concat.MergeData","page":"Concat","title":"Daf.Concat.MergeData","text":"A vector of pairs where the key is a DataKey and the value is MergeAction. Similarly to ViewData, the order of the entries matters (last one wins), and a key containing \"*\" is expanded to all the relevant properties. For matrices, merge is done separately for each layout. That is, the order of the key (rows_axis, columns_axis, matrix_name) key does matter in the MergeData, which is different from how ViewData works.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:MergeData = AbstractVector{Pair{DataKey, MergeAction}}But what we are forced to say is:ViewData = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"concat.html#Daf.Concat.MergeAction","page":"Concat","title":"Daf.Concat.MergeAction","text":"The action for merging the values of a property from the concatenated data sets into the result data set. This is used to properties that do not apply to the concatenation axis (that is, scalar properties, and vector and matrix properties of other axes). Valid values are:\n\nSkipProperty - do not create the property in the result. This is the default.\nLastValue - use the value from the last concatenated data set (that has a value for the property). This is useful for properties that have the same value for all concatenated data sets.\nCollectAxis - collect the values from all the data sets, adding a dimension to the data (that is, convert a scalar property to a vector, and a vector property to a matrix). This can't be applied to matrix properties, because we can't directly store 3D data inside Daf. In addition, this requires that a dataset axis is created in the target, and that an empty value is specified for the property if it is missing from any of the concatenated data sets.\n\n\n\n\n\n","category":"type"},{"location":"concat.html#Index","page":"Concat","title":"Index","text":"","category":"section"},{"location":"concat.html","page":"Concat","title":"Concat","text":"Pages = [\"concat.md\"]","category":"page"},{"location":"queries.html#Queries","page":"Queries","title":"Queries","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries","category":"page"},{"location":"queries.html#Daf.Queries","page":"Queries","title":"Daf.Queries","text":"Extract data from a DafReader.\n\n\n\n\n\n","category":"module"},{"location":"queries.html#Construction","page":"Queries","title":"Construction","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.Query\nDaf.Queries.@q_str","category":"page"},{"location":"queries.html#Daf.Queries.Query","page":"Queries","title":"Daf.Queries.Query","text":"Query(query_string::AbstractString) <: QueryOperation\n\nA query is a description of a (sub-)process for extracting some data from a DafReader. A full query is a sequence of QueryOperation, that when applied one at a time on some DafReader, result in a scalar, vector or matrix result. A single Lookup or a single Axis are also valid complete queries.\n\nTo apply a query, invoke get_query to apply a query to some DafReader data (you can also use the shorthand dafquery instead of get_query(daf query)). By default, query operations will cache their results in memory as QueryData, to speed up repeated queries. This may lock up large amounts of memory; you can empty_cache! to release it.\n\nQueries can be constructed in two ways. In code, a query can be built by chaining query operations (e.g., the expression Axis(\"gene\") |> Lookup(\"is_marker\") looks up the is_marker vector property of the gene axis).\n\nAlternatively, a query can be parsed from a string, which needs to be parsed into a Query object (e.g., the above can be written as Query(\"/gene:is_marker\")). See the QUERY_OPERATORS for a table of supported operators. Spaces (and comments) around the operators are optional; see tokenize for details. You can also convert a Query to a string (or print it, etc.) to see its representation. This is used for error messages and as a key when caching query results.\n\nSince query strings use \\ as an escape character, it is easier to use raw string literals for queries (e.g., Query(raw\"cell = ATGC\\:B1 : age\") vs. Query(\"cell = ATGC\\\\:B1 : age\")). To make this even easier we provide the q macro (e.g., q\"cell = ATGC\\:B1 : batch\") which works similarly to Julia's standard r macro for literal Regex strings.\n\nBeing able to represent queries as strings allows for reading them from configuration files and letting the user input them in an application UI (e.g., allowing the user to specify the X, Y and/or colors of a scatter plot using queries). At the same time, being able to incrementally build queries using code allows for convenient reuse (e.g., reusing axis sub-queries in Daf views), without having to go through the string representation.\n\nDaf provides a comprehensive set of QueryOperations that can be used to construct queries. The QUERY_OPERATORS listed below provide the basic functionality (e.g., specifying an Axis or a property Lookup). In addition, Daf provides computation operations (EltwiseOperation and ReductionOperation), allowing for additional operations to be provided by external packages.\n\nObviously not all possible combinations of operations make sense (e.g., Lookup(\"is_marker\") |> Axis(\"cell\") will not work). For the full list of valid combinations, see NAMES_QUERY, SCALAR_QUERY, VECTOR_QUERY and MATRIX_QUERY below.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.@q_str","page":"Queries","title":"Daf.Queries.@q_str","text":"q\"...\"\n\nShorthand for parsing a literal string as a Query. This is equivalent to Query(raw\"...\"), that is, a \\ can be placed in the string without escaping it (except for before a \"). This is very convenient for literal queries (e.g., q\"/ cell = ATGC\\:B1 : batch\" == Query(raw\"/ cell = ATGC\\:B1 : batch\") == Query(\"/ cell = ATGC\\\\:B1 : batch\") == `Axis(\"cell\") |> IsEqual(\"ATGC:B1\") |> Lookup(\"batch\")).\n\n\n\n\n\n","category":"macro"},{"location":"queries.html#Functions","page":"Queries","title":"Functions","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.get_query\nDaf.Queries.get_frame\nDaf.Queries.QueryColumns\nDaf.Queries.query_result_dimensions\nDaf.Queries.is_axis_query","category":"page"},{"location":"queries.html#Daf.Queries.get_query","page":"Queries","title":"Daf.Queries.get_query","text":"get_query(\n daf::DafReader,\n query::Union{Query, AbstractString};\n [cache::Bool = true]\n)::Union{StorageScalar, NamedVector, NamedMatrix}\n\nApply the full query to the Daf data and return the result. By default, this will cache results, so repeated queries will be accelerated. This may consume a large amount of memory. You can disable it by specifying cache = false, or release the cached data using empty_cache!.\n\nAs a shorthand syntax you can also invoke this using getindex, that is, using the [] operator (e.g., daf[q\"/ cell\"] is equivalent to get_query(daf, q\"/ cell\")).\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.get_frame","page":"Queries","title":"Daf.Queries.get_frame","text":"get_frame(\n daf::DafReader,\n axis::Union{Query, AbstractString},\n [columns::Maybe{Union{AbstractStringVector, QueryColumns}} = nothing;\n cache::Bool = true]\n)::DataFrame end\n\nReturn a DataFrame containing multiple vectors of the same axis.\n\nThe axis can be either just the name of an axis (e.g., \"cell\"), or a query for the axis (e.g., q\"/ cell\"), possibly using a mask (e.g., q\"/ cell & age > 1\"). The result of the query must be a vector of unique axis entry names.\n\nIf columns is not specified, the data frame will contain all the vector properties of the axis, in alphabetical order (since DataFrame has no concept of named rows, the 1st column will contain the name of the axis entry). Otherwise, columns may be a vector of names of vector properties (e.g., [\"batch\", \"age\"]), or a vector of pairs mapping a column name to a query suffix (e.g., [\"color\" => q\": type => color\"]). This suffix is applied to the axis query (e.g., if the axis is masked as above, the full query for the color column would be q\"/ cell & age > 1 : type => color). The result of the full query must be a vector.\n\nBy default, this will cache results of all queries. This may consume a large amount of memory. You can disable it by specifying cache = false, or release the cached data using empty_cache!.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.QueryColumns","page":"Queries","title":"Daf.Queries.QueryColumns","text":"Specify columns for a data frame. This is a vector of pairs, where the key is the column name, and the value is a query that computes the data of the column.\n\nnote: Note\nDue to Julia's type system limitations, there's just no way for the system to enforce the type of the pairs in this vector. That is, what we'd like to say is:QueryColumns = AbstractVector{Pair{AbstractString, Union{AbstractString, Query}}}But what we are forced to say is:QueryColumns = AbstractVector{<:Pair}Glory to anyone who figures out an incantation that would force the system to perform more meaningful type inference here.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.query_result_dimensions","page":"Queries","title":"Daf.Queries.query_result_dimensions","text":"query_result_dimensions(query::Union{Query, AbstractString})::Int\n\nReturn the number of dimensions (-1 - names, 0 - scalar, 1 - vector, 2 - matrix) of the results of a query. This also verifies the query is syntactically valid, though it may still fail if applied to specific data due to invalid data values or types.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Daf.Queries.is_axis_query","page":"Queries","title":"Daf.Queries.is_axis_query","text":"is_axis_query(query::Union{Query, AbstractString})::Bool\n\nReturns whether the query specifies a (possibly masked) axis. This also verifies the query is syntactically valid, though it may still fail if applied to specific data due to invalid data values or types.\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Syntax","page":"Queries","title":"Syntax","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.QUERY_OPERATORS\nDaf.Queries.NAMES_QUERY\nDaf.Queries.SCALAR_QUERY\nDaf.Queries.LOOKUP_PROPERTY\nDaf.Queries.VECTOR_ENTRY\nDaf.Queries.MATRIX_ENTRY\nDaf.Queries.REDUCE_VECTOR\nDaf.Queries.VECTOR_QUERY\nDaf.Queries.VECTOR_PROPERTY\nDaf.Queries.VECTOR_LOOKUP\nDaf.Queries.MATRIX_ROW\nDaf.Queries.MATRIX_COLUMN\nDaf.Queries.REDUCE_MATRIX\nDaf.Queries.MATRIX_QUERY\nDaf.Queries.MATRIX_LOOKUP\nDaf.Queries.COUNTS_MATRIX\nDaf.Queries.POST_PROCESS\nDaf.Queries.GROUP_BY\nDaf.Queries.AXIS_MASK\nDaf.Queries.MASK_OPERATION\nDaf.Queries.VECTOR_FETCH\nDaf.Queries.ComparisonOperation\nDaf.Queries.guess_typed_value","category":"page"},{"location":"queries.html#Daf.Queries.QUERY_OPERATORS","page":"Queries","title":"Daf.Queries.QUERY_OPERATORS","text":"Operators used to represent a Query as a string.\n\nOperator Implementation Description\n/ Axis Specify a vector or matrix axis (e.g., / cell : batch or / cell / gene : UMIs).\n? Names 1. Names of scalars or axes (? axes, ? scalars).\n 2. Names of vectors of axis (e.g., / cell ?).\n 3. Names of matrices of axes (e.g., / cell / gene ?).\n: Lookup Lookup a property (e.g., @ version, / cell : batch or / cell / gene : UMIs).\n=> Fetch Fetch a property from another axis (e.g., / cell : batch => age).\n! AsAxis 1. Specify axis name when fetching a property (e.g., / cell : manual ! type => color).\n 2. Force all axis values when counting (e.g., / cell : batch ! * manual ! type).\n 3. Force all axis values when grouping (e.g., / cell : age @ batch ! %> Mean).\n?? IfNot 1. Mask excluding false-ish values (e.g., / cell : batch ?? => age).\n 2. Default for false-ish lookup values (e.g., / cell : type ?? Outlier).\n 3. Default for false-ish fetched values (e.g., / cell : batch ?? 1 => age).\n││ IfMissing 1. Value for missing lookup properties (e.g., / gene : is_marker ││ false).\n 2. Value for missing fetched properties (e.g., `/ cell : type\n 3. Value for empty reduced vectors (e.g., `/ cell : type = LMPP => age %> Max\n% EltwiseOperation Apply an element-wise operation (e.g., / cell / gene : UMIs % Log base 2 eps 1).\n%> ReductionOperation Apply a reduction operation (e.g., / cell / gene : UMIs %> Sum).\n* CountBy Compute counts matrix (e.g., / cell : age * type).\n@ GroupBy 1. Aggregate vector entries by a group (e.g., / cell : age @ type %> Mean).\n 2. Aggregate matrix row entries by a group (e.g.,/ cell / gene : UMIs @ type %> Max).\n& And Restrict axis entries (e.g., / gene & is_marker).\n&! AndNot Restrict axis entries (e.g., / gene &! is_marker).\n│ Or Expand axis entries (e.g., / gene & is_marker │ is_noisy).\n│! OrNot Expand axis entries (e.g., / gene & is_marker │! is_noisy).\n^ Xor Flip axis entries (e.g., / gene & is_marker ^ is_noisy).\n^! XorNot Flip axis entries (e.g., / gene & is_marker ^! is_noisy).\n= IsEqual 1. Select an entry from an axis (e.g., / cell / gene = FOX1 : UMIs).\n 2. Compare equal (e.g., / cell & age = 1).\n!= IsNotEqual Compare not equal (e.g., / cell & age != 1).\n< IsLess Compare less than (e.g., / cell & age < 1).\n<= IsLessEqual Compare less or equal (e.g., / cell & age <= 1).\n> IsGreater Compare greater than (e.g., / cell & age > 1).\n>= IsGreaterEqual Compare greater or equal (e.g., / cell & age >= 1).\n~ IsMatch Compare match (e.g., / gene & name ~ RP\\[SL\\]).\n!~ IsNotMatch Compare not match (e.g., / gene & name !~ RP\\[SL\\]).\n\nnote: Note\nDue to Julia's Documenter limitations, the ASCII | character (|) is replaced by the Unicode │ character (│) in the above table. Sigh.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.NAMES_QUERY","page":"Queries","title":"Daf.Queries.NAMES_QUERY","text":"NAMES_QUERY := ( Names scalars | Names axes | Axis Names | Axis Axis Names )\n\nA query returning a set of names:\n\nLooking up the set of names of the scalar properties (? scalars).\nLooking up the set of names of the axes (? axes).\nLooking up the set of names of the vector properties of an axis (e.g., / cell ?).\nLooking up the set of names of the matrix properties of a pair of axes (e.g., / cell / gene ?).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.SCALAR_QUERY","page":"Queries","title":"Daf.Queries.SCALAR_QUERY","text":"SCALAR_QUERY := ( LOOKUP_PROPERTY](@ref) | VECTOR_ENTRY | MATRIX_ENTRY | REDUCE_VECTOR ) EltwiseOperation*\n\nA query returning a scalar can be one of:\n\nLooking up the value of a scalar property (e.g., : version will return the value of the version scalar property).\nPicking a single entry of a vector property (e.g., / gene = FOX1 : is_marker will return whether the gene named FOX1 is a marker gene).\nPicking a single entry of a matrix property (e.g., / gene = FOX1 / cell = ATGC : UMIs will return the number of UMIs of the FOX1 gene of the ATGC cell).\nReducing some vector into a single value (e.g., / donor : age %> Mean will compute the mean age of all the donors).\n\nEither way, this can be followed by a series of EltwiseOperation to modify the scalar result (e.g., / donor : age %> Mean % Log base 2 % Abs will compute the absolute value of the log base 2 of the mean age of all the donors).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.LOOKUP_PROPERTY","page":"Queries","title":"Daf.Queries.LOOKUP_PROPERTY","text":"LOOKUP_PROPERTY := Lookup IfMissing?\n\nLookup the value of a scalar or matrix property. This is used on its own to access a scalar property (e.g., : version) or combined with two axes to access a matrix property (e.g., / cell / gene : UMIs).\n\nBy default, it is an error if the property does not exist. However, if an IfMissing is provided, then this value is used instead (e.g., : version || Unknown will return a Unknown if there is no version scalar property, and / cell / gene : UMIs || 0 will return an all-zero matrix if there is no UMIs matrix property).\n\nAccessing a VECTOR_PROPERTY allows for more complex operations.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_ENTRY","page":"Queries","title":"Daf.Queries.VECTOR_ENTRY","text":"VECTOR_ENTRY := Axis IsEqual VECTOR_LOOKUP\n\nLookup the scalar value of some entry of a vector property of some axis (e.g., / gene = FOX1 : is_marker will return whether the FOX1 gene is a marker gene).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_ENTRY","page":"Queries","title":"Daf.Queries.MATRIX_ENTRY","text":"MATRIX_ENTRY := Axis IsEqual Axis IsEqual LOOKUP_PROPERTY\n\nLookup the scalar value of the named entry of a matrix property (e.g., / gene = FOX1 / cell = ATGC : UMIs will return the number of UMIs of the FOX1 gene of the ATGC cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.REDUCE_VECTOR","page":"Queries","title":"Daf.Queries.REDUCE_VECTOR","text":"REDUCEVECTOR := [`VECTORQUERY](@ref) [ReductionOperation](@ref) [IfMissing`](@ref)?\n\nPerform an arbitrary vector query, and reduce the result into a single scalar value (e.g., / donor : age %> Mean will compute the mean age of the ages of the donors).\n\nBy default, it is an error if the vector query results in an empty vector. However, if an IfMissing suffix is provided, then this value is used instead (e.g., / cell & type = LMPP : age %> Mean || 0 will return zero if there are no cells whose type is LMPP).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_QUERY","page":"Queries","title":"Daf.Queries.VECTOR_QUERY","text":"VECTOR_QUERY := ( VECTOR_PROPERTY | MATRIX_ROW | MATRIX_COLUMN | REDUCE_MATRIX ) POST_PROCESS*\n\nA query returning a vector can be one of:\n\nLooking up the value of a vector property (e.g., / gene : is_marker will return a mask of the marker genes).\nPicking a single row or column of a matrix property (e.g., / gene = FOX1 / cell : UMIs will return a vector of the UMIs of the FOX1 gene of all the cells).\nReducing each column of some matrix into a scalar, resulting in a vector (e.g., / gene / cell : UMIs %> Sum will compute the sum of the UMIs of all the genes in each cell).\n\nEither way, this can be followed by further processing of the vector (e.g., / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of one plus the of the UMIs of each gene in each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_PROPERTY","page":"Queries","title":"Daf.Queries.VECTOR_PROPERTY","text":"VECTOR_PROPERTY := Axis AXIS_MASK* [VECTOR_LOOKUP] VECTOR_FETCH*\n\nLookup the values of some vector property (e.g., / gene : is_marker will return a mask of the marker genes). This can be restricted to a subset of the vector using masks (e.g., / gene & is_marker : is_noisy will return a mask of the noisy genes out of the marker genes), and/or fetch the property value from indirect axes (e.g., / cell : batch => donor => age will return the age of the donor of the batch of each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_LOOKUP","page":"Queries","title":"Daf.Queries.VECTOR_LOOKUP","text":"VECTOR_LOOKUP := Lookup IfMissing? ( IfNot | AsAxis )?\n\nA [Lookup] of a vector property (e.g., / cell : type will return the type of each cell).\n\nBy default, it is an error if the property does not exist. However, if an IfMissing is provided, then this value is used instead (e.g., / cell : type || Unknown will return a vector of Unknown types if there is no type property for the cell axis).\n\nIf the IfNot suffix is provided, it controls how to modify \"false-ish\" (empty string, zero numeric value, or false Boolean value) entries (e.g., / cell : type ? will return a vector of the type of each cell that has a non-empty type, while / cell : type ? Outlier will return a vector of the type of each cell, where cells with an empty type are given the type Outlier).\n\nOnly when the vector property is used for CountBy or for GroupBy, providing the AsAxis suffix indicates that the property is associated with an axis (similar to an indirect axis in Fetch), and the set of groups is forced to be the values of that axis; in this case, empty string values are always ignored (e.g., / cell : age @ type ! %> Mean || 0 will return a vector of the mean age of the cells of each type, with a value of zero for types which have no cells, and ignoring cells which have an empty type; similarly, / cell : batch => donor ! * type ! will return a matrix whose rows are donors and columns are types, counting the number of cells of each type that were sampled from each donor, ignoring cells which have an empty type or whose batch has an empty donor).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_ROW","page":"Queries","title":"Daf.Queries.MATRIX_ROW","text":"MATRIX_ROW := Axis IsEqual Axis AXIS_MASK* Lookup\n\nLookup the values of a single row of a matrix property, eliminating the rows axis (e.g., / gene = FOX1 / cell : UMIs will evaluate to a vector of the UMIs of the FOX1 gene of all the cells).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_COLUMN","page":"Queries","title":"Daf.Queries.MATRIX_COLUMN","text":"MATRIX_COLUMN := Axis AXIS_MASK* Axis IsEqual Lookup\n\nLookup the values of a single column of a matrix property, eliminating the columns axis (e.g., / gene / cell = ATGC : UMIs will evaluate to a vector of the UMIs of all the genes of the ATGC cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.REDUCE_MATRIX","page":"Queries","title":"Daf.Queries.REDUCE_MATRIX","text":"REDUCE_MATRIX := MATRIX_QUERY ReductionOperation\n\nPerform an arbitrary matrix query, and reduce the result into a vector by converting each column into a single value, eliminating the rows axis (e.g., / gene / cell : UMIs %> Sum will evaluate to a vector of the total UMIs of each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_QUERY","page":"Queries","title":"Daf.Queries.MATRIX_QUERY","text":"MATRIX_QUERY := ( MATRIX_LOOKUP | COUNTS_MATRIX ) POST_PROCESS*\n\nA query returning a matrix can be one of:\n\nLooking up the value of a matrix property (e.g., / gene / cell : UMIs will return the matrix of UMIs for each gene and cell).\nCounting the number of times each combination of two vector properties occurs in the data (e.g., / cell : batch => donor => age * type will return a matrix whose rows are ages and columns are types, where each entry contains the number of cells which have the specific type and age).\n\nEither way, this can be followed by a series of EltwiseOperation to modify the results (e.g., / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of 1 plus the UMIs of each gene in each cell).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MATRIX_LOOKUP","page":"Queries","title":"Daf.Queries.MATRIX_LOOKUP","text":"MATRIX_LOOKUP := Axis AXIS_MASK* Axis AXIS_MASK* Lookup\n\nLookup the values of some matrix property (e.g., / gene / cell : UMIs will return the matrix of UMIs of each gene in each cell). This can be restricted to a subset of the vector using masks (e.g., / gene & is_marker / cell & type = LMPP : UMIs will return a matrix of the UMIs of each marker gene in cells whose type is LMPP).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.COUNTS_MATRIX","page":"Queries","title":"Daf.Queries.COUNTS_MATRIX","text":"COUNTS_MATRIX := VECTOR_QUERY CountBy VECTOR_FETCH*\n\nCompute a matrix of counts of each combination of values given two vectors (e.g., / cell : batch => donor => age * batch => donor => sex will return a matrix whose rows are ages and columns are sexes, where each entry contains the number of cells which have the specific age and sex).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.POST_PROCESS","page":"Queries","title":"Daf.Queries.POST_PROCESS","text":"POST_PROCESS := EltwiseOperation | GROUP_BY\n\nA vector or a matrix result may be processed by one of:\n\nApplying an EltwiseOperation operation to each value (e.g., / donor : age % Log base 2 will compute the log base 2 of the ages of all donors, and / gene / cell : UMIs % Log base 2 eps 1 will compute the log base 2 of 1 plus the UMIs count of each gene in each cell).\nReducing each group of vector entries or matrix rows into a single value (e.g., / cell : batch => donor => age @ type %> Mean will compute a vector of the mean age of the cells of each type, and / cell / gene : UMIs @ type %> Mean will compute a matrix of the mean UMIs of each gene for the cells of each type).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.GROUP_BY","page":"Queries","title":"Daf.Queries.GROUP_BY","text":"GROUP_BY := GroupBy VECTOR_FETCH* ReductionOperation IfMissing\n\nThe entries of a vector or the rows of a matrix result may be grouped, where all the values that have the same group value are reduced to a single value using a ReductionOperation (e.g., / cell : batch => donor => age @ type %> Mean will compute the mean age of all the cells of each type, and / cell / gene : UMIs @ type %> Mean will compute a matrix of the mean UMIs of each gene for the cells of each type).\n\nIf the group property is suffixed by AsAxis, then the result will have a value for each entry of the axis (e.g., / cell : age @ type ! %> Mean will compute the mean age of the cells of each type). In this case, some groups may have no values at all, which by default, is an error. Providing an IfMissing suffix will use the specified value for such empty groups instead (e.g., / cell : age @ type ! %> Mean || 0 will compute the mean age for the cells of each type, with a zero value for types for which there are no cells).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.AXIS_MASK","page":"Queries","title":"Daf.Queries.AXIS_MASK","text":"AXIS_MASK := MASK_OPERATION ( VECTOR_FETCH )* ( ComparisonOperation )?\n\nRestrict the set of entries of an axis to lookup results for (e.g., / gene & is_marker). If the mask is based on a non-Bool property, it is converted to a Boolean by comparing with the empty string or a zero value (depending on its data type); alternatively, you can explicitly compare it with a value (e.g., / cell & batch => donor => age > 1).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.MASK_OPERATION","page":"Queries","title":"Daf.Queries.MASK_OPERATION","text":"MASK_OPERATION := And | AndNot | Or | OrNot | Xor | XorNot\n\nA query operation for restricting the set of entries of an Axis. The mask operations are applied to the current mask, so if several operations are applied, they are applied in order from left to right (e.g., / gene & is_marker | is_noisy &! is_lateral will first restrict the set of genes to marker genes, then expand it to include noisy genes as well, then remove all the lateral genes; this would be different from / gene & is_marker &! is_lateral | is_noisy, which will include all noisy genes even if they are lateral).\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.VECTOR_FETCH","page":"Queries","title":"Daf.Queries.VECTOR_FETCH","text":"VECTOR_FETCH := AsAxis? Fetch IfMissing? ( IfNot | AsAxis )?\n\nFetch the value of a property of an indirect axis. That is, there is a common pattern where one axis (e.g., cell) has a property (e.g., type) which has the same name as an axis, and whose values are (string) entry names of that axis. In this case, we often want to lookup a property of the other axis (e.g., / cell : type => color will evaluate to a vector of the color of the type of each cell). Sometimes one walks a chain of such properties (e.g., / cell : batch => donor => age).\n\nSometimes it is needed to store several alternate properties that refer to the same indirect axis. In this case, the name of the property can begin with the axis name, followed by . and a suffix (e.g., / cell : type.manual => color will fetch the color of the manual type of each cell, still using the type axis).\n\nIf the property does not follow this convention, it is possible to manually specify the name of the axis using an AsAxis prefix (e.g., / cell : manual ! type => color will assume the value of the manual property is a vector of names of entries of the type axis).\n\nAs usual, if the property does not exist, this is an error, unless an IfMissing suffix is provided (e.g., / cell : type || red => color will assign all cells the color red if the type property does not exist).\n\nIf the value of the property is the empty string for some vector entries, by default this is again an error (as the empty string is not one of the values of the indirect axis). If an IfNot suffix is provided, such entries can be removed from the result (e.g., / cell : type ? => color will return a vector of the colors of the cells which have a non-empty type), or can be given an specific value (e.g., / cell : type ? red => color will return a vector of a color for each cell, giving the red color to cells with an empty type).\n\nWhen using IfMissing and/or IfNot, the default value provided is always of the final value (e.g., / cell : batch || -1 ? -2 => donor || -3 ? -4 => age || -5 ? -6 will compute a vector if age per cell; if there's no batch property, all cells will get the age -1). If there is such property, then cells with an empty batch will get the age -2. For cells with a non-empty batch, if there's no donor property, they will get the value -3. If there is such a property, cells with an empty donor will get the value -4. Finally, for cells with a batch and donor, if there is no age property, they will be given an age of -5. Otherwise, if their age is zero, it will be changed to -6.\n\n\n\n\n\n","category":"constant"},{"location":"queries.html#Daf.Queries.ComparisonOperation","page":"Queries","title":"Daf.Queries.ComparisonOperation","text":"ComparisonOperation := ( IsLess | IsLessEqual | IsEqual | IsNotEqual | IsGreater | IsGreaterEqual | IsMatch | IsNotMatch )\n\nA query operation computing a mask by comparing the values of a vector with some constant (e.g., / cell & age > 0). In addition, the IsEqual operation can be used to slice an entry from a vector (e.g., / gene = FOX1 : is_marker) or a matrix (e.g., / cell / gene = FOX1 & UMIs, / cell = ATGC / gene = FOX1 : UMIs).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.guess_typed_value","page":"Queries","title":"Daf.Queries.guess_typed_value","text":"guess_typed_value(value::AbstractString)::StorageScalar\n\nGiven a string value, guess the typed value it represents:\n\ntrue and false are assumed to be Bool.\nIntegers are assumed to be Int64.\nFloating point numbers are assumed to be Float64, as are e and pi.\nAnything else is assumed to be a string.\n\nThis doesn't have to be 100% accurate; it is intended to allow omitting the data type in most cases when specifying an IfMissing value. If it guesses wrong, just specify an explicit type (e.g., @ version || 1.0 String).\n\n\n\n\n\n","category":"function"},{"location":"queries.html#Query-Operators","page":"Queries","title":"Query Operators","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Daf.Queries.QuerySequence\n\nDaf.Queries.And\nDaf.Queries.AndNot\nDaf.Queries.AsAxis\nDaf.Queries.Axis\nDaf.Queries.CountBy\nDaf.Queries.Fetch\nDaf.Queries.GroupBy\nDaf.Queries.IfMissing\nDaf.Queries.IfNot\nDaf.Queries.IsEqual\nDaf.Queries.IsGreater\nDaf.Queries.IsGreaterEqual\nDaf.Queries.IsLess\nDaf.Queries.IsLessEqual\nDaf.Queries.IsMatch\nDaf.Queries.IsNotEqual\nDaf.Queries.IsNotMatch\nDaf.Queries.Lookup\nDaf.Queries.Names\nDaf.Queries.Or\nDaf.Queries.OrNot\nDaf.Queries.Xor\nDaf.Queries.XorNot","category":"page"},{"location":"queries.html#Daf.Queries.QuerySequence","page":"Queries","title":"Daf.Queries.QuerySequence","text":"struct QuerySequence{N} <: Query where {N}\n\nA sequence of N QueryOperations.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.And","page":"Queries","title":"Daf.Queries.And","text":"And(property::AbstractString) <: QueryOperation\n\nA query operation for restricting the set of entries of an Axis. In a string Query, this is specified using the & operator, followed by the name of an axis property to look up to compute the mask.\n\nThe mask may be just the fetched property (e.g., / gene & is_marker will restrict the result vector to only marker genes). If the value of the property is not Boolean, it is automatically compared to 0 or the empty string, depending on its type (e.g., / cell & type will restrict the result vector to only cells which were given a non-empty-string type annotation). It is also possible to fetch properties from other axes, and use an explicit ComparisonOperation to compute the Boolean mask (e.g., / cell & batch => age > 1 will restrict the result vector to cells whose batch has an age larger than 1).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.AndNot","page":"Queries","title":"Daf.Queries.AndNot","text":"AndNot(property::AbstractString) <: QueryOperation\n\nSame as And but use the inverse of the mask. In a string Query, this is specified using the &! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.AsAxis","page":"Queries","title":"Daf.Queries.AsAxis","text":"AsAxis([axis::AbstractString = nothing]) <: QueryOperation\n\nThere are three cases where we may want to take a vector property and consider each value to be the name of an entry of some axis: Fetch, CountBy and GroupBy. In a string Query, this is indicated by the ! operators, optionally followed by the name of the axis to use.\n\nWhen using Fetch, we always lookup in some axis, so AsAxis is implied (e.g., / cell : type => color is identical to / cell : type ! => color). In contrast, when using CountBy and GroupBy, one has to explicitly specify AsAxis to force using all the entries of the axis for the counting or grouping (e.g., / cell : age @ type %> Mean will return a vector of the mean age of every type that has cells associated with it, while / cell : age @ type ! %> Mean will return a vector of the mean age of each and every value of the type axis; similarly, / cell : type * age will generate a counts matrix whose rows are types that have cells associated with them, while / cell : type ! * age will generate a counts matrix whose rows are exactly the entries of the type axis).\n\nSince the set of values is fixed by the axis matching the vector property, it is possible that, when using this for GroupBy, some groups would have no values, causing an error. This can be avoided by providing an IfMissing suffix to the reduction (e.g., / cell : age @ type ! %> Mean will fail if some type has no cells associated with it, while / cell : age @ type ! %> Mean || 0 will give such types a zero mean age).\n\nTypically, the name of the base property is identical to the name of the axis. In this case, there is no need to specify the name of the axis (as in the examples above). Sometimes it is useful to be able to store several vector properties which all map to the same axis. To support this, we support a naming convention where the property name begins with the axis name followed by a .suffix. (e.g., both / cell : type => color and / cell : type.manual => color will look up the color of the type of some property of the cell axis - either \"the\" type of each cell, or the alternate type.manual of each cell).\n\nIf the property name does not follow the above conventions, then it is possible to explicitly specify the name of the axis (e.g., / cell : manual ! type => color will consider each value of the manual property as the name of an entry of the type axis and look up the matching color property value of this axis).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Axis","page":"Queries","title":"Daf.Queries.Axis","text":"Axis(axis::AbstractString) <: QueryOperation\n\nA query operation for specifying a result axis. In a string Query, this is specified using the / operator followed by the axis name.\n\nThis needs to be specified at least once for a vector query (e.g., / cell : batch), and twice for a matrix (e.g., / cell / gene : UMIs). Axes can be filtered using Boolean masks using And, AndNot, Or, OrNot, Xor and XorNot (e.g., / gene & is_marker : is_noisy). Alternatively, a single entry can be selected from the axis using IsEqual (e.g., / gene = FOX1 : is_noisy, / cell / gene = FOX1 : UMIs, / cell = C1 / gene = FOX1 : UMIs). Finally, a matrix can be reduced into a vector, and a vector to a scalar, using ReductionOperation (e.g., / gene / cell : UMIs %> Sum %> Mean).\n\nnote: Note\nThis, Names and Lookup are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.CountBy","page":"Queries","title":"Daf.Queries.CountBy","text":"CountBy(property::AbstractString) <: QueryOperation\n\nA query operation that generates a matrix of counts of combinations of pairs of values for the same entries of an axis. That is, it follows fetching some vector property, and is followed by fetching a second vector property of the same axis. The result is a matrix whose rows are the values of the 1st property and the columns are the values of the 2nd property, and the values are the number of times the combination of values appears. In a string Query, this is specified using the * operator, followed by the property name to look up (e.g., / cell : type * batch will generate a matrix whose rows correspond to cell types, whose columns correspond to cell batches, and whose values are the number of cells of each combination of batch and type).\n\nBy default, the rows and/or columns only contain actually seen values and are ordered alphabetically. However, it is common that one or both of the properties correspond to an axis. In this case, you can use an AsAxis suffix to force the rows and/or columns of the matrix to be exactly the entries of the specific axis (e.g., / cell : type ! * batch will generate a matrix whose rows are exactly the entries of the type axis, even if there is a type without any cells). This is especially useful when both properties are axes, as the result can be stored as a matrix property (e.g., / cell : type ! * batch ! will generate a matrix whose rows are the entries of the type axis, and whose columns are the entries of the batch axis, so it can be given to set_matrix!(daf, \"type\", \"batch\", ...)).\n\nThe raw counts matrix can be post-processed like any other matrix (using ReductionOperation or an EltwiseOperation). This allows computing useful aggregate properties (e.g., / cell : type * batch % Fractions will generate a matrix whose columns correspond to batches and whose rows are the fraction of the cells from each type within each batch).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Fetch","page":"Queries","title":"Daf.Queries.Fetch","text":"Fetch(property::AbstractString) <: QueryOperation\n\nA query operation for fetching the value of a property from another axis, based on a vector property whose values are entry names of the axis. In a string Query, this is specified using the => operator, followed by the name to look up.\n\nThat is, if you query for the values of a vector property (e.g., batch for each cell), and the name of this property is identical to some axis name, then we assume each value is the name of an entry of this axis. We use this to fetch the value of some other property (e.g., age) of that axis (e.g., / cell : batch => age).\n\nIt is useful to be able to store several vector properties which all map to the same axis. To support this, we support a naming convention where the property name begins with the axis name followed by a .suffix. (e.g., both / cell : type => color and / cell : type.manual => color will look up the color of the type of some property of the cell axis - either \"the\" type of each cell, or the alternate type.manual of each cell).\n\nFetching can be chained (e.g., / cell : batch => donor => age will fetch the age of the donor of the batch of each cell).\n\nIf the property does not exist, this is an error, unless this is followed by IfMissing (e.g., / cell : type => color || red). If the property contains an empty value, this is also an error, unless it is followed by an IfNot (e.g., / cell : type ? => color will compute a vector of the colors of the type of the cells that have a non-empty type, and / cell : batch ? 0 => donor => age will assign a zero age for cells which have an empty batch).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.GroupBy","page":"Queries","title":"Daf.Queries.GroupBy","text":"GroupBy(property::AbstractString) <: QueryOperation\n\nA query operation that uses a (following) ReductionOperation to aggregate the values of each group of values. Will fetch the specified property_name (possibly followed by additional Fetch operations) and use the resulting vector for the name of the group of each value.\n\nIf applied to a vector, the result is a vector with one entry per group (e.g., / cell : age @ type %> Mean will generate a vector with an entry per cell type and whose values are the mean age of the cells of each type). If applied to a matrix, the result is a matrix with one row per group (e.g., / cell / gene : UMIs @ type %> Max will generate a matrix with one row per type and one column per gene, whose values are the maximal UMIs count of the gene in the cells of each type).\n\nBy default, the result uses only group values we actually observe, in sorted order. However, if the operation is followed by an AsAxis suffix, then the fetched property must correspond to an existing axis (similar to when using Fetch), and the result will use the entries of the axis, even if we do not observe them in the data (and will ignore vector entries with an empty value). In this case, the reduction operation will fail if there are no values for some group, unless it is followed by an IfMissing suffix (e.g., / cell : age @ type ! %> Mean will generate a vector whose entries are all the entries of the type axis, and will ignore cells with an empty type; this will fail if there are types which are not associated with any cell. In contrast, / cell : age @ type ! %> Mean || 0 will succeed, assigning a value of zero for types which have no cells associated with them).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IfMissing","page":"Queries","title":"Daf.Queries.IfMissing","text":"IfMissing(value::StorageScalar; dtype::Maybe{Type} = nothing) <: QueryOperation\n\nA query operation providing a value to use if the data is missing some property. In a string Query, this is specified using the || operator, followed by the value to use, and optionally followed by the data type of the value (e.g., : score || 1 Float32).\n\nIf the data type is not specified, and the value isa AbstractString, then the data type is deduced using guess_typed_value of the value.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IfNot","page":"Queries","title":"Daf.Queries.IfNot","text":"IfNot(value::Maybe{StorageScalar} = nothing) <: QueryOperation\n\nA query operation providing a value to use for \"false-ish\" values in a vector (empty strings, zero numeric values, or false Boolean values). In a string Query, this is indicated using the ?? operator, optionally followed by a value to use.\n\nIf the value is nothing (the default), then these entries are dropped (masked out) of the result (e.g., / cell : type ? behaves the same as / cell & type : type, that is, returns the type of the cells which have a non-empty type). Otherwise, this value is used instead of the \"false-ish\" value (e.g., / cell : type ? Outlier will return a vector of the type of each cell, with the value Outlier for cells with an empty type). When fetching properties, this is the final value (e.g., / cell : type ? red => color will return a vector of the color of the type of each cell, with a red color for the cells with an empty type).\n\nIf the value isa AbstractString, then it is automatically converted to the data type of the elements of the results vector.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsEqual","page":"Queries","title":"Daf.Queries.IsEqual","text":"IsEqual(value::StorageScalar) <: QueryOperation\n\nEquality is used for two purposes:\n\nAs a comparison operator, similar to IsLess except that uses = instead of < for the comparison.\nTo select a single entry from a vector. This allows a query to select a single scalar from a vector (e.g., / gene = FOX1 : is_marker) or from a matrix (e.g., / cell = ATGC / gene = FOX1 : UMIs); or to slice a single vector from a matrix (e.g., / cell = ATGC / gene : UMIs or / cell / gene = FOX1 : UMIs).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsGreater","page":"Queries","title":"Daf.Queries.IsGreater","text":"IsGreater(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses > instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsGreaterEqual","page":"Queries","title":"Daf.Queries.IsGreaterEqual","text":"IsGreaterEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses >= instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsLess","page":"Queries","title":"Daf.Queries.IsLess","text":"IsLess(value::StorageScalar) <: QueryOperation\n\nA query operation for converting a vector value to a Boolean mask by comparing it some value. In a string Query, this is specified using the < operator, followed by the value to compare with.\n\nA string value is automatically converted into the same type as the vector values (e.g., / cell & probability < 0.5 will restrict the result vector only to cells whose probability is less than half).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsLessEqual","page":"Queries","title":"Daf.Queries.IsLessEqual","text":"IsLessEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses <= instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsMatch","page":"Queries","title":"Daf.Queries.IsMatch","text":"IsMatch(value::Union{AbstractString, Regex}) <: QueryOperation\n\nSimilar to IsLess except that the compared values must be strings, and the mask is of the values that match the given regular expression.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsNotEqual","page":"Queries","title":"Daf.Queries.IsNotEqual","text":"IsNotEqual(value::StorageScalar) <: QueryOperation\n\nSimilar to IsLess except that uses != instead of < for the comparison.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.IsNotMatch","page":"Queries","title":"Daf.Queries.IsNotMatch","text":"IsNotMatch(value::Union{AbstractString, Regex}) <: QueryOperation\n\nSimilar to IsMatch except that looks for entries that do not match the pattern.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Lookup","page":"Queries","title":"Daf.Queries.Lookup","text":"Lookup(property::AbstractString) <: Query\n\nA query operation for looking up the value of a property with some name. In a string Query, this is specified using the : operator, followed by the property name to look up.\n\nIf the query state is empty, this looks up the value of a scalar property (e.g., : version).\nIf the query state contains a single axis, this looks up the value of a vector property (e.g., / cell : batch).\nIf the query state contains two axes, this looks up the value of a matrix property (e.g., / cell / gene : UMIs).\n\nIf the property does not exist, this is an error, unless this is followed by IfMissing (e.g., : version || 1.0).\n\nIf any of the axes has a single entry selected using [IsEqual]@(ref), this will reduce the dimension of the result (e.g., / cell / gene = FOX1 : UMIs is a vector, and both / cell = C1 / gene = FOX1 : UMI and / gene = FOX1 : is_marker are scalars).\n\nnote: Note\nThis, Names and Axis are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Names","page":"Queries","title":"Daf.Queries.Names","text":"Names(kind::Maybe{AbstractString} = nothing) <: Query\n\nA query operation for looking up a set of names. In a string Query, this is specified using the ? operator, optionally followed by the kind of objects to name.\n\nIf the query state is empty, a kind must be specified, one of scalars or axes, and the result is the set of their names (? scalars, ? axes).\nIf the query state contains a single axis (without any masks), the kind must not be specified, and the result is the set of names of vector properties of the axis (e.g., / cell ?).\nIf the query state contains two axes (without any masks), the kind must not be specified, and the result is the set of names of matrix properties of the axes (e.g., / cell / gene ?).\n\nnote: Note\nThis, Lookup and Axis are the only QueryOperations that also works as a complete Query.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Or","page":"Queries","title":"Daf.Queries.Or","text":"Or(property::AbstractString) <: QueryOperation\n\nA query operation for expanding the set of entries of an Axis. In a string Query, this is specified using the | operator, followed by the name of an axis property to look up to compute the mask.\n\nThis works similarly to And, except that it adds to the mask (e.g., / gene & is_marker | is_noisy will restrict the result vector to either marker or noisy genes).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.OrNot","page":"Queries","title":"Daf.Queries.OrNot","text":"OrNot(property::AbstractString) <: QueryOperation\n\nSame as Or but use the inverse of the mask. In a string Query, this is specified using the |! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.Xor","page":"Queries","title":"Daf.Queries.Xor","text":"Xor(property::AbstractString) <: QueryOperation\n\nA query operation for flipping the set of entries of an Axis. In a string Query, this is specified using the ^ operator, followed by the name of an axis property to look up to compute the mask.\n\nThis works similarly to Or, except that it flips entries in the mask (e.g., / gene & is_marker ^ is_noisy will restrict the result vector to either marker or noisy genes, but not both).\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Daf.Queries.XorNot","page":"Queries","title":"Daf.Queries.XorNot","text":"XorNot(property::AbstractString) <: QueryOperation\n\nSame as Xor but use the inverse of the mask. In a string Query, this is specified using the ^! operator, followed by the name of an axis property to look up to compute the mask.\n\n\n\n\n\n","category":"type"},{"location":"queries.html#Index","page":"Queries","title":"Index","text":"","category":"section"},{"location":"queries.html","page":"Queries","title":"Queries","text":"Pages = [\"queries.md\"]","category":"page"},{"location":"index.html#Daf","page":"Daf","title":"Daf","text":"","category":"section"},{"location":"index.html","page":"Daf","title":"Daf","text":"Daf.Daf","category":"page"},{"location":"index.html#Daf.Daf","page":"Daf","title":"Daf.Daf","text":"The Daf.jl package provides a uniform generic interface for accessing 1D and 2D data arranged along some set of axes. This is a much-needed generalization of the AnnData functionality. The key features are:\n\nThe data model StorageTypes include (1) some axes with named entries, (2) vector data indexed by a single axis, (3) matrix data indexed by a pair of axes, and also (4) scalar data (anything not tied to some axis).\nExplicit control over 2D data MatrixLayouts (row or column major), with support for both dense and sparse matrices, both of which are crucial for performance.\nOut of the box, allow storing the data in memory (using MemoryDaf), directly inside HDF5 files (using H5df), or as a collection of simple files in a directory (using FilesDaf), which works nicely with tools like make for automating computation pipelines.\nImport and export to/from AnnDataFormat for interoperability with non-Daf tools.\nImplementation with a focus on memory-mapping to allow for efficient processing of large data sets (in theory, larger than the system's memory). In particular, merely opening a data set is a fast operation (almost) regardless of its size.\nWell-defined interfaces for implementing additional storage Formats.\nCreating Chains of data sets, allowing zero-copy reuse of common data between multiple computation pipelines.\nConcat multiple data sets into a single data set along one or more axes.\nA Query language for accessing the data, providing features such as slicing, aggregation and filtering, and making Views and Copies based on these queries.\nSelf documenting Computations with an explicit Contracts describing and enforcing the inputs and outputs, and Adapters for applying the computation to data of a different format.\n\nThe top-level Daf module re-exports all(most) everything from the sub-modules, so you can directly access any exported symbol by using Daf (or import Daf: MemoryDaf), instead of having to import or use qualified names (such as Daf.MemoryFormat.MemoryDaf).\n\nThe Daf data sets type hierarchy looks like this:\n\nDafReader (abstract type)\n├─ DafReadOnly (abstract type)\n│ ├─ DafReadOnlyWrapper (created by read_only)\n│ ├─ DafView (created by viewer)\n│ └─ DafChainReader (created by chain_reader)\n└─ DafWriter (abstract type)\n ├─ DafChainWriter (created by chain_writer)\n ├─ MemoryDaf\n ├─ FilesDaf\n └─ H5df\n\n\n\n\n\n","category":"module"},{"location":"index.html#Index","page":"Daf","title":"Index","text":"","category":"section"},{"location":"index.html","page":"Daf","title":"Daf","text":"","category":"page"},{"location":"adapters.html#Adapters","page":"Adapters","title":"Adapters","text":"","category":"section"},{"location":"adapters.html","page":"Adapters","title":"Adapters","text":"Daf.Adapters\nDaf.Adapters.adapter","category":"page"},{"location":"adapters.html#Daf.Adapters","page":"Adapters","title":"Daf.Adapters","text":"Adapt Daf data to a @computation.\n\n\n\n\n\n","category":"module"},{"location":"adapters.html#Daf.Adapters.adapter","page":"Adapters","title":"Daf.Adapters.adapter","text":"adapter(\n computation::Function,\n view::Union{DafWriter, DafReadOnly},\n [name::Maybe{AbstractString} = nothing,\n capture=MemoryDaf,\n axes::Maybe{ViewAxes} = nothing,\n data::Maybe{ViewData} = nothing,\n empty::Maybe{EmptyData} = nothing,\n relayout::Bool = true,\n overwrite::Bool = false]\n)::Any\n\nInvoke a computation on a view data set and return the result; copy a viewer of the updated data set into the base Daf data of the view. If specified, the name is used as a prefix for all the names; otherwise, the view name is used as the prefix.\n\nIf you have some Daf data you wish to run a computation on, you need to deal with name mismatches. That is, the names of the input and output data properties of the computation may be different from these used in your data. In addition, you might be interested only in a subset of the computed data properties, to avoiding polluting your data set with irrelevant properties.\n\nTo address these issues, the common idiom for applying computations to Daf data is to use the adapter as follows:\n\nCreate a (read-only) view of your data which presents the data properties under the names expected by the computation, using viewer. If the computation was annotated by @computation, then its Contract will be explicitly documented so you will know exactly what to provide.\nPass this view to adapter, which will invoke the computation with a (writable) adapted version of the data (created using chain_writer and a new DafWriter to capture the output; by default, this will be a [MemoryDaf]@(ref)), but it can be any function that takes a name (named) parameter and returns a DafWriter.\nOnce the computation is done, create a new view of the output, which presents the subset of the output data properties you are interested in, with the names you would like to store them as. Again, if the computation was annotated by @computation, then its Contract will be explicitly documented so you will know exactly what to expect.\nCopy this output view data into the base Daf data of the view (using copy_all!, empty, relayout (default: true) and overwrite (default: false).\n\nnote: Note\nIf the names of the properties in the input already match the contract of the computation, you can pass the data set directly as the view. The call to adapter may still be needed to filter or rename the computation's output. If the outputs can also be used as-is, then there's no need to invoke adapter; directly apply the computation to the data and be done.\n\nTypically the code would look something like this:\n\ndaf = ... # Some input `Daf` data we wish to compute on.\n\n# Here `daf` contains the inputs for the computation, but possibly\n# under a different name.\n\nresult = adapter(\n viewer(daf; ...), # How to view the input in the way expected by the computation.\n name = \"example\", # A name to use to generate the temporary `Daf` data names.\n axes = ..., data = ..., # How and what to view from the output for copying back into `daf`.\n empty = ..., # If the input view specifies a subset of some axes.\n) do adapted # The writable adapted data we can pass to the computation.\n computation(adapted, ...) # Actually do the computation.\n return ... # An additional result outside `daf`.\nend\n\n# Here `daf` will contain the specific renamed outputs specified in `adapter`,\n# and you can also access the additional non-`daf` data `result`.\n\nThis idiom allows @computation functions to use clear generic names for their inputs and outputs, and still apply them to arbitrary data sets using more specific names. One can even invoke the same computation with different parameter values, and store the different results in the same data set under different names.\n\n\n\n\n\n","category":"function"},{"location":"adapters.html#Index","page":"Adapters","title":"Index","text":"","category":"section"},{"location":"adapters.html","page":"Adapters","title":"Adapters","text":"Pages = [\"adapter.md\"]","category":"page"},{"location":"h5df_format.html#H5DF-Format","page":"H5DF Format","title":"H5DF Format","text":"","category":"section"},{"location":"h5df_format.html","page":"H5DF Format","title":"H5DF Format","text":"Daf.H5dfFormat\nDaf.H5dfFormat.MAJOR_VERSION\nDaf.H5dfFormat.MINOR_VERSION\nDaf.H5dfFormat.H5df","category":"page"},{"location":"h5df_format.html#Daf.H5dfFormat","page":"H5DF Format","title":"Daf.H5dfFormat","text":"A Daf storage format in an HDF5 disk file. This is the \"native\" way to store Daf data in HDF5 files, which can be used to contain \"anything\", as HDF5 is essentially \"a filesystem inside a file\", with \"groups\" instead of directories and \"datasets\" instead of files. Therefore HDF5 is very generic, and there are various specific formats which use specific internal structure to hold some data in it - for example, h5ad files have a specific internal structure for representing AnnData objects. To represent Daf data in HDF5 storage, we use the following internal structure (which is not compatible with h5ad):\n\nThe HDF5 file may contain Daf data directly in the root group, in which case, it is restricted to holding just a single Daf data set. When using such a file, you automatically access the single Daf data set contained in it. By convention such files are given a .h5df suffix.\nAlternatively, the HDF5 file may contain Daf data inside some arbitrary group, in which case, there's no restriction on the content of other groups in the file. Such groups may contain other Daf data (allowing for multiple Daf data sets in a single file), and/or non-Daf data. When using such a file, you need to specify the name of the group that contains the Daf data set you are interested it. By convention, at least if such files contain \"mostly\" (or only) Daf data sets, they are given a .h5dfs suffix, and are accompanied by some documentation describing the top-level groups in the file.\nUnder the Daf data group, there are 4 sub-groups: scalars, axes, vectors and matrices and a daf dataset.\nTo future-proof the format, the daf dataset will contain a vector of two integers, the first acting as the major version number and the second as the minor version number, using semantic versioning. This makes it easy to test whether some group in an HDF5 file does/n't contain Daf data, and which version of the internal structure it is using. Currently the only defined version is [1,0].\nThe scalars group contains scalar properties, each as its own \"dataset\". The only supported scalar data types are these included in StorageScalar. If you really need something else, serialize it to JSON and store the result as a string scalar. This should be extremely rare.\nThe axes group contains a \"dataset\" per axis, which contains a vector of strings (the names of the axis entries).\nThe vectors group contains a sub-group for each axis. Each such sub-group contains vector properties. If the vector is dense, it is stored directly as a \"dataset\". Otherwise, it is stored as a group containing two vector \"datasets\": nzind is containing the indices of the non-zero values, and nzval containing the actual values. See Julia's SparseVector implementation for details. The only supported vector element types are these included in StorageScalar, same as StorageVector.\nThe matrices group contains a sub-group for each rows axis, which contains a sub-group for each columns axis. Each such sub-sub group contains matrix properties. If the matrix is dense, it is stored directly as a \"dataset\" (in column-major layout). Otherwise, it is stored as a group containing three vector \"datasets\": colptr containing the indices of the rows of each column in rowval, rowval containing the indices of the non-zero rows of the columns, and nzval containing the non-zero matrix entry values. See Julia's SparseMatrixCSC implementation for details. The only supported matrix element types are these included in StorageNumber - this explicitly excludes matrices of strings, same as StorageMatrix.\nAll vectors and matrices are stored in a contiguous way in the file, which allows us to efficiently memory-map them.\n\nThat's all there is to it. Due to the above restrictions on types and layout, the metadata provided by HDF5 for each \"dataset\" is sufficient to fully describe the data, and one should be able to directly access it using any HDF5 API in any programming language, if needed. Typically, however, it is easiest to simply use the Julia Daf package to access the data.\n\nExample HDF5 structure:\n\nexample-daf-dataset-root-group/\n├─ daf\n├─ scalars/\n│ └─ version\n├─ axes/\n│ ├─ cell\n│ └─ gene\n├─ vectors/\n│ ├─ cell/\n│ │ └─ batch\n│ └─ gene/\n│ └─ is_marker\n└─ matrices/\n ├─ cell/\n │ ├─ cell/\n │ └─ gene/\n │ └─ UMIs/\n │ ├─ colptr\n │ ├─ rowval\n │ └─ nzval\n └─ gene/\n ├─ cell/\n └─ gene/\n\nnote: Note\nWhen creating an HDF5 file to contain Daf data, you should specify ;fapl=HDF5.FileAccessProperties(;alignment=(1,8)). This ensures all the memory buffers are properly aligned for efficient access. Otherwise, memory mapping will be much less efficient. A warning is therefore generated whenever you try to access Daf data stored in an HDF5 file which does not enforce proper alignment.\n\nnote: Note\nDeleting data from an HDF5 file does not reuse the abandoned storage. In general if you want to reclaim that storage, you will need to repack the file, which will invalidate any memory-mapped buffers created for it. Therefore, if you delete data (e.g. using delete_vector!), you should eventually abandon the H5df object, repack the HDF5 file, then create a new H5df object to access the repacked data.\n\nnote: Note\nThe code here assumes the HDF5 data obeys all the above conventions and restrictions (that said, code will be able to access vectors and matrices stored in unaligned, chunked and/or compressed formats, but this will be much less efficient). As long as you only create and access Daf data in HDF5 files using H5df, then the code will work as expected (assuming no bugs). However, if you do this in some other way (e.g., directly using some HDF5 API in some arbitrary programming language), and the result is invalid, then the code here may fails with \"less than friendly\" error messages.\n\n\n\n\n\n","category":"module"},{"location":"h5df_format.html#Daf.H5dfFormat.MAJOR_VERSION","page":"H5DF Format","title":"Daf.H5dfFormat.MAJOR_VERSION","text":"The specific major version of the H5df format that is supported by this code (1). The code will refuse to access data that is stored in a different major format.\n\n\n\n\n\n","category":"constant"},{"location":"h5df_format.html#Daf.H5dfFormat.MINOR_VERSION","page":"H5DF Format","title":"Daf.H5dfFormat.MINOR_VERSION","text":"The maximal minor version of the H5df format that is supported by this code (0). The code will refuse to access data that is stored with the expected major version (1), but that uses a higher minor version.\n\nnote: Note\nModifying data that is stored with a lower minor version number may increase its minor version number.\n\n\n\n\n\n","category":"constant"},{"location":"h5df_format.html#Daf.H5dfFormat.H5df","page":"H5DF Format","title":"Daf.H5dfFormat.H5df","text":"H5df(\n root::Union{AbstractString, HDF5.File, HDF5.Group},\n mode::AbstractString = \"r\";\n [name::Maybe{AbstractString} = nothing]\n)\n\nStorage in a HDF5 file.\n\nThe root can be the path of an HDF5 file, which will be opened with the specified mode, or an opened HDF5 file, in which cases the Daf data set will be stored directly in the root of the file (by convention, using a .h5df file name suffix). Alternatively, the root can be a group inside an HDF5 file, which allows to store multiple Daf data sets inside the same HDF5 file (by convention, using a .h5dfs file name suffix).\n\nWhen opening an existing data set, if name is not specified, and there exists a \"name\" scalar property, it is used as the name. Otherwise, the path of the HDF5 file will be used as the name, followed by the internal path of the group (if any).\n\nThe valid mode values are as follows (the default mode is r):\n\nMode Allow modifications? Create if does not exist? Truncate if exists? Returned type\nr No No No DafReadOnly\nr+ Yes No No H5df\nw+ Yes Yes No H5df\nw Yes Yes Yes H5df\n\nnote: Note\nIf specifying a path (string) root, when calling h5open, the file alignment of created files is set to (1, 8) to maximize efficiency of mapped vectors and matrices, and the w+ mode is converted to cw.\n\n\n\n\n\n","category":"type"},{"location":"h5df_format.html#Index","page":"H5DF Format","title":"Index","text":"","category":"section"},{"location":"h5df_format.html","page":"H5DF Format","title":"H5DF Format","text":"Pages = [\"h5df_format.md\"]","category":"page"}] } diff --git a/src/chains.jl b/src/chains.jl index 281ce65..b4c3691 100644 --- a/src/chains.jl +++ b/src/chains.jl @@ -18,6 +18,7 @@ using Daf.StorageTypes using Daf.Writers using SparseArrays +import Daf.Formats.FormatReader import Daf.Formats.Internal import Daf.Formats.as_read_only_array import Daf.Messages @@ -176,7 +177,7 @@ function Formats.format_has_scalar(chain::AnyChain, name::AbstractString)::Bool end function Formats.format_set_scalar!(chain::WriteChain, name::AbstractString, value::StorageScalar)::Nothing - Formats.format_set_scalar!(chain.daf, name, value) + set_scalar!(chain.daf, name, value) return nothing end @@ -195,7 +196,7 @@ function Formats.format_delete_scalar!(chain::WriteChain, name::AbstractString; end end end - Formats.format_delete_scalar!(chain.daf, name; for_set = for_set) + delete_scalar!(chain.daf, name; must_exist = false, _for_set = for_set) return nothing end @@ -203,7 +204,7 @@ function Formats.format_get_scalar(chain::AnyChain, name::AbstractString)::Stora for daf in reverse(chain.dafs) value = Formats.with_read_lock(daf) do if Formats.format_has_scalar(daf, name) - return Formats.format_get_scalar(daf, name) + return Formats.get_scalar_through_cache(daf, name) else return nothing end @@ -216,11 +217,16 @@ function Formats.format_get_scalar(chain::AnyChain, name::AbstractString)::Stora end function Formats.format_scalar_names(chain::AnyChain)::AbstractStringSet - return reduce(union, [ - Formats.with_read_lock(daf) do - return Formats.format_scalar_names(daf) - end for daf in chain.dafs - ]) + return reduce( + union, + [ + Formats.with_read_lock(daf) do + return Formats.get_through_cache(daf, Formats.scalar_names_cache_key(), AbstractStringSet) do + return Formats.format_scalar_names(daf) + end + end for daf in chain.dafs + ], + ) end function Formats.format_has_axis(chain::AnyChain, axis::AbstractString; for_change::Bool)::Bool @@ -237,7 +243,7 @@ function Formats.format_has_axis(chain::AnyChain, axis::AbstractString; for_chan end function Formats.format_add_axis!(chain::WriteChain, axis::AbstractString, entries::AbstractStringVector)::Nothing - Formats.format_add_axis!(chain.daf, axis, entries) + add_axis!(chain.daf, axis, entries) return nothing end @@ -254,14 +260,14 @@ function Formats.format_delete_axis!(chain::WriteChain, axis::AbstractString)::N end end end - Formats.format_delete_axis!(chain.daf, axis) + delete_axis!(chain.daf, axis) return nothing end function Formats.format_axis_names(chain::AnyChain)::AbstractStringSet return reduce(union, [ Formats.with_read_lock(daf) do - return Formats.format_axis_names(daf) + return Formats.get_axis_names_through_cache(daf) end for daf in chain.dafs ]) end @@ -270,7 +276,7 @@ function Formats.format_get_axis(chain::AnyChain, axis::AbstractString)::Abstrac for daf in reverse(chain.dafs) axis_entries = Formats.with_read_lock(daf) do if Formats.format_has_axis(daf, axis; for_change = false) - return Formats.format_get_axis(daf, axis) + return Formats.get_axis_through_cache(daf, axis) else return nothing end @@ -317,25 +323,25 @@ function Formats.format_set_vector!( vector::Union{StorageScalar, StorageVector}, )::Nothing if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end - Formats.format_set_vector!(chain.daf, axis, name, vector) + set_vector!(chain.daf, axis, name, vector) return nothing end -function Formats.format_empty_dense_vector!( +function Formats.format_get_empty_dense_vector!( chain::WriteChain, axis::AbstractString, name::AbstractString, eltype::Type{T}, )::AbstractVector{T} where {T <: StorageNumber} if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end - return Formats.format_empty_dense_vector!(chain.daf, axis, name, eltype) + return get_empty_dense_vector!(chain.daf, axis, name, eltype; overwrite = true) end -function Formats.format_empty_sparse_vector!( +function Formats.format_get_empty_sparse_vector!( chain::WriteChain, axis::AbstractString, name::AbstractString, @@ -344,19 +350,19 @@ function Formats.format_empty_sparse_vector!( indtype::Type{I}, )::Tuple{AbstractVector{I}, AbstractVector{T}, Any} where {T <: StorageNumber, I <: StorageInteger} if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end - return Formats.format_empty_sparse_vector!(chain.daf, axis, name, eltype, nnz, indtype) + return get_empty_sparse_vector!(chain.daf, axis, name, eltype, nnz, indtype) end -function Formats.format_filled_sparse_vector!( +function Formats.format_filled_empty_sparse_vector!( chain::WriteChain, axis::AbstractString, name::AbstractString, extra::Any, filled::SparseVector{T, I}, )::Nothing where {T <: StorageNumber, I <: StorageInteger} - Formats.format_filled_sparse_vector!(chain.daf, axis, name, extra, filled) + Formats.format_filled_empty_sparse_vector!(chain.daf, axis, name, extra, filled) return nothing end @@ -382,7 +388,7 @@ function Formats.format_delete_vector!( end end if Formats.format_has_axis(chain.daf, axis; for_change = false) && Formats.format_has_vector(chain.daf, axis, name) - Formats.format_delete_vector!(chain.daf, axis, name; for_set = for_set) + delete_vector!(chain.daf, axis, name; _for_set = for_set) end return nothing end @@ -402,7 +408,7 @@ function Formats.format_get_vector(chain::AnyChain, axis::AbstractString, name:: for daf in reverse(chain.dafs) vector = Formats.with_read_lock(daf) do if Formats.format_has_axis(daf, axis; for_change = false) && Formats.format_has_vector(daf, axis, name) - return as_read_only_array(Formats.format_get_vector(daf, axis, name)) + return as_read_only_array(Formats.get_vector_through_cache(daf, axis, name)) else return nothing end @@ -449,14 +455,14 @@ function Formats.format_set_matrix!( )::Nothing for axis in (rows_axis, columns_axis) if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end end - Formats.format_set_matrix!(chain.daf, rows_axis, columns_axis, name, matrix) + set_matrix!(chain.daf, rows_axis, columns_axis, name, matrix; relayout = false) return nothing end -function Formats.format_empty_dense_matrix!( +function Formats.format_get_empty_dense_matrix!( chain::WriteChain, rows_axis::AbstractString, columns_axis::AbstractString, @@ -465,13 +471,13 @@ function Formats.format_empty_dense_matrix!( )::AbstractMatrix{T} where {T <: StorageNumber} for axis in (rows_axis, columns_axis) if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end end - return Formats.format_empty_dense_matrix!(chain.daf, rows_axis, columns_axis, name, eltype) + return get_empty_dense_matrix!(chain.daf, rows_axis, columns_axis, name, eltype) end -function Formats.format_empty_sparse_matrix!( +function Formats.format_get_empty_sparse_matrix!( chain::WriteChain, rows_axis::AbstractString, columns_axis::AbstractString, @@ -482,13 +488,13 @@ function Formats.format_empty_sparse_matrix!( )::Tuple{AbstractVector{I}, AbstractVector{I}, AbstractVector{T}, Any} where {T <: StorageNumber, I <: StorageInteger} for axis in (rows_axis, columns_axis) if !Formats.format_has_axis(chain.daf, axis; for_change = false) - Formats.format_add_axis!(chain.daf, axis, Formats.format_get_axis(chain, axis)) + add_axis!(chain.daf, axis, Formats.get_axis_through_cache(chain, axis)) end end - return Formats.format_empty_sparse_matrix!(chain.daf, rows_axis, columns_axis, name, eltype, nnz, indtype) + return get_empty_sparse_matrix!(chain.daf, rows_axis, columns_axis, name, eltype, nnz, indtype) end -function Formats.format_filled_sparse_matrix!( +function Formats.format_filled_empty_sparse_matrix!( chain::WriteChain, rows_axis::AbstractString, columns_axis::AbstractString, @@ -496,7 +502,7 @@ function Formats.format_filled_sparse_matrix!( extra::Any, filled::SparseMatrixCSC{T, I}, )::Nothing where {T <: StorageNumber, I <: StorageInteger} - Formats.format_filled_sparse_matrix!(chain.daf, rows_axis, columns_axis, name, extra, filled) + Formats.format_filled_empty_sparse_matrix!(chain.daf, rows_axis, columns_axis, name, extra, filled) return nothing end @@ -506,7 +512,7 @@ function Formats.format_relayout_matrix!( columns_axis::AbstractString, name::AbstractString, )::Nothing - Formats.format_relayout_matrix!(chain.daf, rows_axis, columns_axis, name) + relayout_matrix!(chain.daf, rows_axis, columns_axis, name) return nothing end @@ -538,7 +544,7 @@ function Formats.format_delete_matrix!( if Formats.format_has_axis(chain.daf, rows_axis; for_change = false) && Formats.format_has_axis(chain.daf, columns_axis; for_change = false) && Formats.format_has_matrix(chain.daf, rows_axis, columns_axis, name) - Formats.format_delete_matrix!(chain.daf, rows_axis, columns_axis, name; for_set = for_set) + delete_matrix!(chain.daf, rows_axis, columns_axis, name; relayout = false, _for_set = for_set) end return nothing end @@ -551,7 +557,7 @@ function Formats.format_matrix_names( return reduce( union, [ - Formats.format_matrix_names(daf, rows_axis, columns_axis) for + Formats.get_matrix_names_through_cache(daf, rows_axis, columns_axis) for daf in chain.dafs if Formats.format_has_axis(daf, rows_axis; for_change = false) && Formats.format_has_axis(daf, columns_axis; for_change = false) ], @@ -569,7 +575,7 @@ function Formats.format_get_matrix( if Formats.format_has_axis(daf, rows_axis; for_change = false) && Formats.format_has_axis(daf, columns_axis; for_change = false) && Formats.format_has_matrix(daf, rows_axis, columns_axis, name) - return as_read_only_array(Formats.format_get_matrix(daf, rows_axis, columns_axis, name)) + return as_read_only_array(Formats.get_matrix_through_cache(daf, rows_axis, columns_axis, name)) else return nothing end diff --git a/src/files_format.jl b/src/files_format.jl index c7a98f4..cd3af41 100644 --- a/src/files_format.jl +++ b/src/files_format.jl @@ -388,7 +388,7 @@ function Formats.format_set_vector!( return nothing end -function Formats.format_empty_dense_vector!( +function Formats.format_get_empty_dense_vector!( files::FilesDaf, axis::AbstractString, name::AbstractString, @@ -407,7 +407,7 @@ function Formats.format_empty_dense_vector!( return vector end -function Formats.format_empty_sparse_vector!( +function Formats.format_get_empty_sparse_vector!( files::FilesDaf, axis::AbstractString, name::AbstractString, @@ -430,7 +430,7 @@ function Formats.format_empty_sparse_vector!( return (nzind_vector, nzval_vector, nothing) end -function Formats.format_filled_sparse_vector!( +function Formats.format_filled_empty_sparse_vector!( files::FilesDaf, axis::AbstractString, name::AbstractString, @@ -550,7 +550,7 @@ function Formats.format_set_matrix!( return nothing end -function Formats.format_empty_dense_matrix!( +function Formats.format_get_empty_dense_matrix!( files::FilesDaf, rows_axis::AbstractString, columns_axis::AbstractString, @@ -569,7 +569,7 @@ function Formats.format_empty_dense_matrix!( return matrix end -function Formats.format_empty_sparse_matrix!( +function Formats.format_get_empty_sparse_matrix!( files::FilesDaf, rows_axis::AbstractString, columns_axis::AbstractString, @@ -601,7 +601,7 @@ function Formats.format_empty_sparse_matrix!( return (colptr_vector, rowval_vector, nzval_vector, nothing) end -function Formats.format_filled_sparse_matrix!( +function Formats.format_filled_empty_sparse_matrix!( files::FilesDaf, rows_axis::AbstractString, columns_axis::AbstractString, @@ -622,7 +622,7 @@ function Formats.format_relayout_matrix!( matrix = Formats.get_matrix_through_cache(files, rows_axis, columns_axis, name).array if matrix isa SparseMatrixCSC - colptr, rowval, nzval = Formats.format_empty_sparse_matrix!( + colptr, rowval, nzval = Formats.format_get_empty_sparse_matrix!( files, columns_axis, rows_axis, @@ -636,7 +636,7 @@ function Formats.format_relayout_matrix!( relayout_matrix = SparseMatrixCSC(axis_length(files, columns_axis), axis_length(files, rows_axis), colptr, rowval, nzval) else - relayout_matrix = Formats.format_empty_dense_matrix!(files, columns_axis, rows_axis, name, eltype(matrix)) + relayout_matrix = Formats.format_get_empty_dense_matrix!(files, columns_axis, rows_axis, name, eltype(matrix)) end relayout!(transpose(relayout_matrix), matrix) diff --git a/src/formats.jl b/src/formats.jl index 40067d7..2e0d0e2 100644 --- a/src/formats.jl +++ b/src/formats.jl @@ -325,7 +325,7 @@ isn't `"name"`, that it does not exist for the `axis`, and that the `vector` has function format_set_vector! end """ - format_empty_dense_vector!( + format_get_empty_dense_vector!( format::FormatWriter, axis::AbstractString, name::AbstractString, @@ -345,10 +345,29 @@ isn't `"name"`, and that it does not exist for the `axis`. `(1,)`, so that elements are consecutive in memory. However it need not be an actual `DenseVector` because of Julia's type system's limitations. """ -function format_empty_dense_vector! end +function format_get_empty_dense_vector! end """ - format_empty_sparse_vector!( + format_filled_empty_dense_vector!( + daf::DafWriter, + axis::AbstractString, + name::AbstractString, + filled_vector::AbstractVector{T}, + )::Nothing where {T <: StorageNumber} + +Allow the `format` to perform caching once the empty dense vector has been `filled`. By default this does nothing. +""" +function format_filled_empty_dense_vector!( + ::DafWriter, + ::AbstractString, + ::AbstractString, + ::AbstractVector{T}, +)::Nothing where {T <: StorageNumber} + return nothing +end + +""" + format_get_empty_sparse_vector!( format::FormatWriter, axis::AbstractString, name::AbstractString, @@ -359,15 +378,15 @@ function format_empty_dense_vector! end where {T <: StorageNumber, I <: StorageInteger} Implement creating an empty dense vector property with some `name` for some `rows_axis` and `columns_axis` in -`format`. The final tuple element is passed to [`format_filled_sparse_vector!`](@ref). +`format`. The final tuple element is passed to [`format_filled_empty_sparse_vector!`](@ref). This trusts we have a write lock on the data set, that the `axis` exists in `format` and that the vector property `name` isn't `"name"`, and that it does not exist for the `axis`. """ -function format_empty_sparse_vector! end +function format_get_empty_sparse_vector! end """ - format_filled_sparse_vector!( + format_filled_empty_sparse_vector!( format::FormatWriter, axis::AbstractString, name::AbstractString, @@ -377,7 +396,7 @@ function format_empty_sparse_vector! end Allow the `format` to perform caching once the empty sparse vector has been `filled`. By default this does nothing. """ -function format_filled_sparse_vector!( # untested +function format_filled_empty_sparse_vector!( # untested ::FormatWriter, ::AbstractString, ::AbstractString, @@ -457,7 +476,7 @@ This trusts we have a write lock on the data set, that the `rows_axis` and `colu function format_set_matrix! end """ - format_empty_dense_matrix!( + format_get_empty_dense_matrix!( format::FormatWriter, rows_axis::AbstractString, columns_axis::AbstractString, @@ -476,10 +495,31 @@ This trusts we have a write lock on the data set, that the `rows_axis` and `colu `(1,nrows)`, so that elements are consecutive in memory. However it need not be an actual `DenseMatrix` because of Julia's type system's limitations. """ -function format_empty_dense_matrix! end +function format_get_empty_dense_matrix! end """ - format_empty_sparse_matrix!( + format_filled_empty_dense_matrix!( + daf::DafWriter, + rows_axis::AbstractString, + columns_axis::AbstractString, + name::AbstractString, + filled_matrix::AbstractVector{T}, + )::Nothing where {T <: StorageNumber} + +Allow the `format` to perform caching once the empty dense matrix has been `filled`. By default this does nothing. +""" +function format_filled_empty_dense_matrix!( + ::DafWriter, + ::AbstractString, + ::AbstractString, + ::AbstractString, + ::AbstractMatrix{T}, +)::Nothing where {T <: StorageNumber} + return nothing +end + +""" + format_get_empty_sparse_matrix!( format::FormatWriter, rows_axis::AbstractString, columns_axis::AbstractString, @@ -491,15 +531,15 @@ function format_empty_dense_matrix! end where {T <: StorageNumber, I <: StorageInteger} Implement creating an empty sparse matrix property with some `name` for some `rows_axis` and `columns_axis` in `format`. -The final tuple element is passed to [`format_filled_sparse_matrix!`](@ref). +The final tuple element is passed to [`format_filled_empty_sparse_matrix!`](@ref). This trusts we have a write lock on the data set, that the `rows_axis` and `columns_axis` exist in `format` and that the `name` matrix property does not exist for them. """ -function format_empty_sparse_matrix! end +function format_get_empty_sparse_matrix! end """ - format_filled_dense_matrix!( + format_filled_empty_dense_matrix!( format::FormatWriter, rows_axis::AbstractString, columns_axis::AbstractString, @@ -510,7 +550,7 @@ function format_empty_sparse_matrix! end Allow the `format` to perform caching once the empty sparse matrix has been `filled`. By default this does nothing. """ -function format_filled_sparse_matrix!( # untested +function format_filled_empty_sparse_matrix!( # untested ::FormatWriter, ::AbstractString, ::AbstractString, @@ -664,7 +704,7 @@ function get_axis_through_cache(format::FormatReader, axis::AbstractString)::Abs end end -function get_vector_through_cache(format::FormatReader, axis::AbstractString, name::AbstractString)::StorageVector +function get_vector_through_cache(format::FormatReader, axis::AbstractString, name::AbstractString)::NamedArray return get_through_cache(format, vector_cache_key(axis, name), StorageVector) do vector = format_get_vector(format, axis, name) return as_named_vector(format, axis, vector) @@ -900,11 +940,14 @@ function store_cached_dependency_key!( end function invalidate_cached!(format::FormatReader, cache_key::AbstractString)::Nothing + @debug "invalidate_cached! daf: $(depict(format)) cache_key: $(cache_key)" + @debug "- delete cache_key: $(cache_key)" delete!(format.internal.cache, cache_key) dependent_keys = pop!(format.internal.dependency_cache_keys, cache_key, nothing) if dependent_keys !== nothing for dependent_key in dependent_keys + @debug "- delete dependent_key: $(dependent_key)" delete!(format.internal.cache, dependent_key) end end @@ -947,7 +990,7 @@ end function with_write_lock(action::Function, format::FormatReader)::Any thread_id = threadid() if format.internal.writer_thread[1] == thread_id - return action() # untested + return action() end lock(format.internal.lock) diff --git a/src/formats.md b/src/formats.md index 89a1e6d..f344807 100644 --- a/src/formats.md +++ b/src/formats.md @@ -99,12 +99,14 @@ Daf.Formats.format_delete_matrix! ### Creating properties ```@docs -Daf.Formats.format_empty_dense_vector! -Daf.Formats.format_empty_sparse_vector! -Daf.Formats.format_filled_sparse_vector! -Daf.Formats.format_empty_dense_matrix! -Daf.Formats.format_empty_sparse_matrix! -Daf.Formats.format_filled_sparse_matrix! +Daf.Formats.format_get_empty_dense_vector! +Daf.Formats.format_filled_empty_dense_vector! +Daf.Formats.format_get_empty_sparse_vector! +Daf.Formats.format_filled_empty_sparse_vector! +Daf.Formats.format_get_empty_dense_matrix! +Daf.Formats.format_filled_empty_dense_matrix! +Daf.Formats.format_get_empty_sparse_matrix! +Daf.Formats.format_filled_empty_sparse_matrix! ``` ## Index diff --git a/src/h5df_format.jl b/src/h5df_format.jl index 7901f47..6853a2c 100644 --- a/src/h5df_format.jl +++ b/src/h5df_format.jl @@ -341,7 +341,9 @@ function Formats.format_add_axis!(h5df::H5df, axis::AbstractString, entries::Abs matrices_group = h5df.root["matrices"] @assert matrices_group isa HDF5.Group - axes = Formats.get_axis_names_through_cache(h5df) + axes = Set(keys(axes_group)) + Formats.cache_axis_names!(h5df, axes, MemoryData) + @assert axis in axes axis_matrices_group = create_group(matrices_group, axis) @@ -462,7 +464,7 @@ function Formats.format_set_vector!( return nothing end -function Formats.format_empty_dense_vector!( +function Formats.format_get_empty_dense_vector!( h5df::H5df, axis::AbstractString, name::AbstractString, @@ -485,7 +487,7 @@ function Formats.format_empty_dense_vector!( return vector end -function Formats.format_empty_sparse_vector!( +function Formats.format_get_empty_sparse_vector!( h5df::H5df, axis::AbstractString, name::AbstractString, @@ -520,7 +522,7 @@ function Formats.format_empty_sparse_vector!( return (nzind_vector, nzval_vector, cache_type) end -function Formats.format_filled_sparse_vector!( +function Formats.format_filled_empty_sparse_vector!( h5df::H5df, axis::AbstractString, name::AbstractString, @@ -645,7 +647,7 @@ function Formats.format_set_matrix!( return nothing end -function Formats.format_empty_dense_matrix!( +function Formats.format_get_empty_dense_matrix!( h5df::H5df, rows_axis::AbstractString, columns_axis::AbstractString, @@ -672,7 +674,7 @@ function Formats.format_empty_dense_matrix!( return matrix end -function Formats.format_empty_sparse_matrix!( +function Formats.format_get_empty_sparse_matrix!( h5df::H5df, rows_axis::AbstractString, columns_axis::AbstractString, @@ -720,7 +722,7 @@ function Formats.format_empty_sparse_matrix!( return (colptr_vector, rowval_vector, nzval_vector, cache_type) end -function Formats.format_filled_sparse_matrix!( +function Formats.format_filled_empty_sparse_matrix!( h5df::H5df, rows_axis::AbstractString, columns_axis::AbstractString, @@ -741,7 +743,7 @@ function Formats.format_relayout_matrix!( matrix = Formats.get_matrix_through_cache(h5df, rows_axis, columns_axis, name).array if matrix isa SparseMatrixCSC - colptr, rowval, nzval = Formats.format_empty_sparse_matrix!( + colptr, rowval, nzval = Formats.format_get_empty_sparse_matrix!( h5df, columns_axis, rows_axis, @@ -755,7 +757,7 @@ function Formats.format_relayout_matrix!( relayout_matrix = SparseMatrixCSC(axis_length(h5df, columns_axis), axis_length(h5df, rows_axis), colptr, rowval, nzval) else - relayout_matrix = Formats.format_empty_dense_matrix!(h5df, columns_axis, rows_axis, name, eltype(matrix)) + relayout_matrix = Formats.format_get_empty_dense_matrix!(h5df, columns_axis, rows_axis, name, eltype(matrix)) end relayout!(transpose(relayout_matrix), matrix) diff --git a/src/matrix_layouts.jl b/src/matrix_layouts.jl index 63a7734..1f215ad 100644 --- a/src/matrix_layouts.jl +++ b/src/matrix_layouts.jl @@ -401,7 +401,7 @@ function depict_matrix_size(matrix::AbstractMatrix, kind::AbstractString; transp end if transposed - return "$(size(matrix, 2)) x $(size(matrix, 1)) x $(eltype(matrix)) $(layout_suffix) (transposed $(kind))" + return "$(size(matrix, 2)) x $(size(matrix, 1)) x $(eltype(matrix)) $(layout_suffix) (Transpose $(kind))" else return "$(size(matrix, 1)) x $(size(matrix, 2)) x $(eltype(matrix)) $(layout_suffix) ($(kind))" end diff --git a/src/memory_format.jl b/src/memory_format.jl index a8a7bb9..6a4c811 100644 --- a/src/memory_format.jl +++ b/src/memory_format.jl @@ -134,7 +134,7 @@ function Formats.format_set_vector!( return nothing end -function Formats.format_empty_dense_vector!( +function Formats.format_get_empty_dense_vector!( memory::MemoryDaf, axis::AbstractString, name::AbstractString, @@ -146,7 +146,7 @@ function Formats.format_empty_dense_vector!( return vector end -function Formats.format_empty_sparse_vector!( +function Formats.format_get_empty_sparse_vector!( ::MemoryDaf, ::AbstractString, ::AbstractString, @@ -159,7 +159,7 @@ function Formats.format_empty_sparse_vector!( return (nzind, nzval, nothing) end -function Formats.format_filled_sparse_vector!( +function Formats.format_filled_empty_sparse_vector!( memory::MemoryDaf, axis::AbstractString, name::AbstractString, @@ -224,7 +224,7 @@ function Formats.format_set_matrix!( return nothing end -function Formats.format_empty_dense_matrix!( +function Formats.format_get_empty_dense_matrix!( memory::MemoryDaf, rows_axis::AbstractString, columns_axis::AbstractString, @@ -238,7 +238,7 @@ function Formats.format_empty_dense_matrix!( return matrix end -function Formats.format_empty_sparse_matrix!( +function Formats.format_get_empty_sparse_matrix!( memory::MemoryDaf, ::AbstractString, columns_axis::AbstractString, @@ -260,7 +260,7 @@ function Formats.format_empty_sparse_matrix!( return (colptr, rowval, nzval, nothing) end -function Formats.format_filled_sparse_matrix!( +function Formats.format_filled_empty_sparse_matrix!( memory::MemoryDaf, rows_axis::AbstractString, columns_axis::AbstractString, diff --git a/src/queries.jl b/src/queries.jl index c870d2a..9bc50a3 100644 --- a/src/queries.jl +++ b/src/queries.jl @@ -988,7 +988,11 @@ function mask_operator(::And)::String return "&" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::And)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::And, +)::Nothing axis_mask .&= mask_vector return nothing end @@ -1007,7 +1011,11 @@ function mask_operator(::AndNot)::String return "&!" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::AndNot)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::AndNot, +)::Nothing axis_mask .&= .!mask_vector return nothing end @@ -1029,7 +1037,11 @@ function mask_operator(::Or)::String return "|" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::Or)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::Or, +)::Nothing axis_mask .|= mask_vector return nothing end @@ -1048,7 +1060,11 @@ function mask_operator(::OrNot)::String return "|!" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::OrNot)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::OrNot, +)::Nothing axis_mask .|= .!mask_vector return nothing end @@ -1070,7 +1086,11 @@ function mask_operator(::Xor)::String return "^" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::Xor)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::Xor, +)::Nothing axis_mask .= @. xor(axis_mask, mask_vector) return nothing end @@ -1089,7 +1109,11 @@ function mask_operator(::XorNot)::String return "^!" end -function update_axis_mask(axis_mask::Vector{Bool}, mask_vector::Union{Vector{Bool}, BitVector}, ::XorNot)::Nothing +function update_axis_mask( + axis_mask::AbstractVector{Bool}, + mask_vector::Union{AbstractVector{Bool}, BitVector}, + ::XorNot, +)::Nothing axis_mask .= @. xor(axis_mask, .!mask_vector) return nothing end @@ -1387,7 +1411,7 @@ mutable struct AxisState query_sequence::QuerySequence dependency_keys::Set{AbstractString} axis_name::AbstractString - axis_modifier::Maybe{Union{Vector{Bool}, Int}} + axis_modifier::Maybe{Union{AbstractVector{Bool}, Int}} end struct FakeAxisState @@ -1685,7 +1709,7 @@ function get_axis_result( if axis_modifier isa Int return axis_entries[axis_modifier], axis_state.dependency_keys else - if axis_modifier isa Vector{Bool} + if axis_modifier isa AbstractVector{Bool} axis_entries = axis_entries[axis_modifier] end return axis_entries, axis_state.dependency_keys @@ -1960,7 +1984,7 @@ function lookup_axes(query_state::QueryState, lookup::Lookup)::Nothing rows_axis_state, dependency_keys, ) - elseif columns_axis_modifier isa Vector{Bool} + elseif columns_axis_modifier isa AbstractVector{Bool} return lookup_matrix( query_state, named_matrix[:, columns_axis_modifier], @@ -1984,7 +2008,7 @@ function lookup_axes(query_state::QueryState, lookup::Lookup)::Nothing named_matrix[rows_axis_modifier, columns_axis_modifier], dependency_keys, ) - elseif columns_axis_modifier isa Vector{Bool} + elseif columns_axis_modifier isa AbstractVector{Bool} return lookup_matrix_slice( query_state, named_matrix[rows_axis_modifier, columns_axis_modifier], @@ -1993,7 +2017,7 @@ function lookup_axes(query_state::QueryState, lookup::Lookup)::Nothing ) end - elseif rows_axis_modifier isa Vector{Bool} + elseif rows_axis_modifier isa AbstractVector{Bool} if columns_axis_modifier === nothing return lookup_matrix( query_state, @@ -2009,7 +2033,7 @@ function lookup_axes(query_state::QueryState, lookup::Lookup)::Nothing rows_axis_state, dependency_keys, ) - elseif columns_axis_modifier isa Vector{Bool} + elseif columns_axis_modifier isa AbstractVector{Bool} return lookup_matrix( query_state, named_matrix[rows_axis_modifier, columns_axis_modifier], @@ -2464,7 +2488,7 @@ function fetch_first_named_vector( if axis_mask === nothing size = axis_length(query_state.daf, vector_fetch_state.common.axis_state.axis_name) else - @assert axis_mask isa Vector{Bool} + @assert axis_mask isa AbstractVector{Bool} size = sum(axis_mask) base_named_vector = base_named_vector[axis_mask] end @@ -2480,7 +2504,7 @@ function fetch_first_named_vector( if axis_mask === nothing vector_fetch_state.may_modify_named_vector = false else - @assert axis_mask isa Vector{Bool} + @assert axis_mask isa AbstractVector{Bool} base_named_vector = base_named_vector[axis_mask] # NOJET vector_fetch_state.may_modify_named_vector = true end @@ -2569,7 +2593,7 @@ function patch_fetched_values( if axis_mask === nothing axis_mask = fetched_mask else - @assert axis_mask isa Vector{Bool} + @assert axis_mask isa AbstractVector{Bool} if !vector_fetch_state.may_modify_axis_mask axis_mask = copy(axis_mask) end @@ -2597,7 +2621,7 @@ function patch_fetched_values( end end @assert masked_index == length(masked_fetched_values) - vector_fetch_state.if_not_values = if_not_values[fetched_mask] + vector_fetch_state.if_not_values = if_not_values[fetched_mask] # NOJET fetched_values = masked_fetched_values end end @@ -2641,7 +2665,7 @@ function ensure_if_not_values(vector_fetch_state::VectorFetchState, size::Int):: return if_not_values end -function ensure_fetched_mask(fetched_mask::Maybe{Vector{Bool}}, size::Int)::Vector{Bool} +function ensure_fetched_mask(fetched_mask::Maybe{AbstractVector{Bool}}, size::Int)::AbstractVector{Bool} if fetched_mask === nothing fetched_mask = ones(Bool, size) end @@ -2781,7 +2805,7 @@ function apply_mask_to_axis_state( elseif eltype(mask_vector) != Bool mask_vector = mask_vector .!= 0 end - @assert mask_vector isa Union{Vector{Bool}, BitVector} + @assert eltype(mask_vector) <: Bool axis_mask = axis_state.axis_modifier if axis_mask === nothing @@ -2789,7 +2813,7 @@ function apply_mask_to_axis_state( axis_state.axis_modifier = axis_mask end - @assert axis_mask isa Vector{Bool} + @assert axis_mask isa AbstractVector{Bool} update_axis_mask(axis_mask, mask_vector, mask_operation) axis_state.axis_modifier = axis_mask return nothing @@ -2913,7 +2937,7 @@ function apply_mask_to_base_vector_state(base_vector_state::VectorState, masked_ masked_axis_mask = masked_axis_state.axis_modifier if base_axis_mask != masked_axis_mask - @assert masked_axis_mask isa Vector{Bool} + @assert masked_axis_mask isa AbstractVector{Bool} @assert base_axis_mask === nothing || !any(masked_axis_mask .& .!base_axis_mask) # NOJET apply_mask_to_vector_state(base_vector_state, masked_axis_mask) end @@ -2929,7 +2953,7 @@ function apply_mask_to_base_matrix_state(base_matrix_state::MatrixState, masked_ masked_axis_mask = masked_axis_state.axis_modifier if base_axis_mask != masked_axis_mask - @assert masked_axis_mask isa Vector{Bool} + @assert masked_axis_mask isa AbstractVector{Bool} @assert base_axis_mask === nothing || !any(masked_axis_mask .& .!base_axis_mask) apply_mask_to_matrix_state_rows(base_matrix_state, masked_axis_mask) end @@ -2997,12 +3021,12 @@ function compute_count_by( return counts_matrix end -function apply_mask_to_vector_state(vector_state::VectorState, new_axis_mask::Vector{Bool})::Nothing +function apply_mask_to_vector_state(vector_state::VectorState, new_axis_mask::AbstractVector{Bool})::Nothing axis_state = vector_state.axis_state @assert axis_state !== nothing old_axis_mask = axis_state.axis_modifier if old_axis_mask === nothing - vector_state.named_vector = vector_state.named_vector[new_axis_mask] + vector_state.named_vector = vector_state.named_vector[new_axis_mask] # NOJET axis_state.axis_modifier = new_axis_mask else sub_axis_mask = new_axis_mask[old_axis_mask] @@ -3012,7 +3036,7 @@ function apply_mask_to_vector_state(vector_state::VectorState, new_axis_mask::Ve return nothing end -function apply_mask_to_matrix_state_rows(matrix_state::MatrixState, new_rows_mask::Vector{Bool})::Nothing +function apply_mask_to_matrix_state_rows(matrix_state::MatrixState, new_rows_mask::AbstractVector{Bool})::Nothing rows_axis_state = matrix_state.rows_axis_state @assert rows_axis_state !== nothing old_rows_mask = rows_axis_state.axis_modifier @@ -3112,7 +3136,7 @@ function fetch_group_by_matrix(query_state::QueryState, group_by::GroupBy)::Noth columns_names = get_axis(query_state.daf, columns_axis_state.axis_name) axis_mask = columns_axis_state.axis_modifier if axis_mask !== nothing - @assert axis_mask isa Vector{Bool} + @assert axis_mask isa AbstractVector{Bool} columns_names = columns_names[axis_mask] end diff --git a/src/writers.jl b/src/writers.jl index 42e1953..e1ed97e 100644 --- a/src/writers.jl +++ b/src/writers.jl @@ -81,15 +81,11 @@ function set_scalar!(daf::DafWriter, name::AbstractString, value::StorageScalar; if !overwrite require_no_scalar(daf, name) - end - - Formats.invalidate_cached!(daf, Formats.scalar_cache_key(name)) - if Formats.format_has_scalar(daf, name) - Formats.format_delete_scalar!(daf, name; for_set = true) else - Formats.invalidate_cached!(daf, Formats.scalar_names_cache_key()) + delete_scalar!(daf, name; must_exist = false, _for_set = true) end + Formats.invalidate_cached!(daf, Formats.scalar_names_cache_key()) Formats.format_set_scalar!(daf, name, value) return nothing @@ -107,7 +103,7 @@ Delete a scalar property with some `name` from `daf`. If `must_exist` (the default), this first verifies the `name` scalar property exists in `daf`. """ -function delete_scalar!(daf::DafWriter, name::AbstractString; must_exist::Bool = true)::Nothing +function delete_scalar!(daf::DafWriter, name::AbstractString; must_exist::Bool = true, _for_set = false)::Nothing return with_write_lock(daf) do @debug "delete_scalar! daf: $(depict(daf)) name: $(name) must exist: $(must_exist)" @@ -118,7 +114,7 @@ function delete_scalar!(daf::DafWriter, name::AbstractString; must_exist::Bool = if Formats.format_has_scalar(daf, name) Formats.invalidate_cached!(daf, Formats.scalar_cache_key(name)) Formats.invalidate_cached!(daf, Formats.scalar_names_cache_key()) - Formats.format_delete_scalar!(daf, name; for_set = false) + Formats.format_delete_scalar!(daf, name; for_set = _for_set) end return nothing @@ -272,6 +268,8 @@ function set_vector!( if !overwrite require_no_vector(daf, axis, name) + else + delete_vector!(daf, axis, name; must_exist = false, _for_set = true) end update_caches_before_set_vector(daf, axis, name) @@ -325,7 +323,7 @@ function get_empty_dense_vector!( axis::AbstractString, name::AbstractString, eltype::Type{T}; - overwrite::Bool, + overwrite::Bool = false, )::AbstractVector{T} where {T <: StorageNumber} @assert isbitstype(eltype) return begin_write_lock(daf) do @@ -338,19 +336,22 @@ function get_empty_dense_vector!( if !overwrite require_no_vector(daf, axis, name) + else + delete_vector!(daf, axis, name; must_exist = false, _for_set = true) end update_caches_before_set_vector(daf, axis, name) - return Formats.format_empty_dense_vector!(daf, axis, name, eltype) + return Formats.format_get_empty_dense_vector!(daf, axis, name, eltype) end end function filled_empty_dense_vector!( - ::DafWriter, - ::AbstractString, - ::AbstractString, + daf::DafWriter, + axis::AbstractString, + name::AbstractString, filled_vector::AbstractVector{T}, )::Nothing where {T <: StorageNumber} + Formats.format_filled_empty_dense_vector!(daf, axis, name, filled_vector) @debug "empty_dense_vector! filled vector: $(depict(filled_vector)) }" return nothing end @@ -424,7 +425,7 @@ function get_empty_sparse_vector!( eltype::Type{T}, nnz::StorageInteger, indtype::Type{I}; - overwrite::Bool, + overwrite::Bool = false, )::Tuple{AbstractVector{I}, AbstractVector{T}, Any} where {T <: StorageNumber, I <: StorageInteger} return begin_write_lock(daf) do @debug "empty_sparse_vector! daf: $(depict(daf)) axis: $(axis) name: $(name) eltype: $(eltype) nnz: $(nnz) indtype: $(indtype) overwrite: $(overwrite) {" @@ -436,10 +437,12 @@ function get_empty_sparse_vector!( if !overwrite require_no_vector(daf, axis, name) + else + delete_vector!(daf, axis, name; must_exist = false, _for_set = true) end update_caches_before_set_vector(daf, axis, name) - return Formats.format_empty_sparse_vector!(daf, axis, name, eltype, nnz, indtype) + return Formats.format_get_empty_sparse_vector!(daf, axis, name, eltype, nnz, indtype) end end @@ -452,7 +455,7 @@ function filled_empty_sparse_vector!( extra::Any, )::Nothing where {T <: StorageNumber, I <: StorageInteger} filled = SparseVector(axis_length(daf, axis), nzind, nzval) - Formats.format_filled_sparse_vector!(daf, axis, name, extra, filled) + Formats.format_filled_empty_sparse_vector!(daf, axis, name, extra, filled) @debug "empty_sparse_vector! filled vector: $(depict(filled)) }" return nothing end @@ -481,7 +484,13 @@ Delete a vector property with some `name` for some `axis` from `daf`. This first verifies the `axis` exists in `daf` and that the property name isn't `name`. If `must_exist` (the default), this also verifies the `name` vector exists for the `axis`. """ -function delete_vector!(daf::DafWriter, axis::AbstractString, name::AbstractString; must_exist::Bool = true)::Nothing +function delete_vector!( + daf::DafWriter, + axis::AbstractString, + name::AbstractString; + must_exist::Bool = true, + _for_set::Bool = false, +)::Nothing return with_write_lock(daf) do @debug "delete_vector! $daf: $(depict(daf)) axis: $(axis) name: $(name) must exist: $(must_exist)" @@ -495,7 +504,7 @@ function delete_vector!(daf::DafWriter, axis::AbstractString, name::AbstractStri if Formats.format_has_vector(daf, axis, name) Formats.invalidate_cached!(daf, Formats.vector_cache_key(axis, name)) Formats.invalidate_cached!(daf, Formats.vector_names_cache_key(axis)) - Formats.format_delete_vector!(daf, axis, name; for_set = false) + Formats.format_delete_vector!(daf, axis, name; for_set = _for_set) end return nothing @@ -568,6 +577,8 @@ function set_matrix!( if relayout require_no_matrix(daf, columns_axis, rows_axis, name; relayout = relayout) end + else + delete_matrix!(daf, columns_axis, rows_axis, name; relayout = relayout, must_exist = false, _for_set = true) end update_caches_before_set_matrix(daf, rows_axis, columns_axis, name) @@ -630,7 +641,7 @@ function get_empty_dense_matrix!( columns_axis::AbstractString, name::AbstractString, eltype::Type{T}; - overwrite::Bool, + overwrite::Bool = false, )::Any where {T <: StorageNumber} return begin_write_lock(daf) do @debug "empty_dense_matrix! daf: $(depict(daf)) rows_axis: $(rows_axis) columns_axis: $(columns_axis) name: $(name) eltype: $(eltype) overwrite: $(overwrite) {" @@ -639,20 +650,23 @@ function get_empty_dense_matrix!( if !overwrite require_no_matrix(daf, rows_axis, columns_axis, name; relayout = false) + else + delete_matrix!(daf, rows_axis, columns_axis, name; relayout = false, must_exist = false, _for_set = true) end update_caches_before_set_matrix(daf, rows_axis, columns_axis, name) - return Formats.format_empty_dense_matrix!(daf, rows_axis, columns_axis, name, eltype) + return Formats.format_get_empty_dense_matrix!(daf, rows_axis, columns_axis, name, eltype) end end function filled_empty_dense_matrix!( - ::DafWriter, - ::AbstractString, - ::AbstractString, - ::AbstractString, + daf::DafWriter, + rows_axis::AbstractString, + columns_axis::AbstractString, + name::AbstractString, filled_matrix::AbstractMatrix{T}, )::Nothing where {T <: StorageNumber} + Formats.format_filled_empty_dense_matrix!(daf, rows_axis, columns_axis, name, filled_matrix) @debug "empty_dense_matrix! filled matrix: $(depict(filled_matrix)) }" return nothing end @@ -734,7 +748,7 @@ function get_empty_sparse_matrix!( eltype::Type{T}, nnz::StorageInteger, indtype::Type{I}; - overwrite::Bool, + overwrite::Bool = false, )::Tuple{AbstractVector{I}, AbstractVector{I}, AbstractVector{T}, Any} where {T <: StorageNumber, I <: StorageInteger} return begin_write_lock(daf) do @debug "empty_sparse_matrix! daf: $(depict(daf)) rows_axis: $(rows_axis) columns_axis: $(columns_axis) name: $(name) eltype: $(eltype) overwrite: $(overwrite) {" @@ -743,10 +757,12 @@ function get_empty_sparse_matrix!( if !overwrite require_no_matrix(daf, rows_axis, columns_axis, name; relayout = false) + else + delete_matrix!(daf, rows_axis, columns_axis, name; relayout = false, must_exist = false, _for_set = true) end update_caches_before_set_matrix(daf, rows_axis, columns_axis, name) - return Formats.format_empty_sparse_matrix!(daf, rows_axis, columns_axis, name, eltype, nnz, indtype) + return Formats.format_get_empty_sparse_matrix!(daf, rows_axis, columns_axis, name, eltype, nnz, indtype) end end @@ -761,7 +777,7 @@ function filled_empty_sparse_matrix!( extra::Any, )::Nothing where {T <: StorageNumber, I <: StorageInteger} filled = SparseMatrixCSC(axis_length(daf, rows_axis), axis_length(daf, columns_axis), colptr, rowval, nzval) - Formats.format_filled_sparse_matrix!(daf, rows_axis, columns_axis, name, extra, filled) + Formats.format_filled_empty_sparse_matrix!(daf, rows_axis, columns_axis, name, extra, filled) @debug "empty_sparse_matrix! filled matrix: $(depict(filled)) }" return nothing end @@ -821,6 +837,8 @@ function relayout_matrix!( if !overwrite require_no_matrix(daf, columns_axis, rows_axis, name; relayout = false) + else + delete_matrix!(daf, columns_axis, rows_axis, name; relayout = false, must_exist = false, _for_set = true) end update_caches_before_set_matrix(daf, columns_axis, rows_axis, name) @@ -875,6 +893,7 @@ function delete_matrix!( name::AbstractString; must_exist::Bool = true, relayout::Bool = true, + _for_set::Bool = false, )::Nothing return with_write_lock(daf) do relayout = relayout && rows_axis != columns_axis @@ -888,11 +907,11 @@ function delete_matrix!( end if Formats.format_has_matrix(daf, rows_axis, columns_axis, name) - update_caches_and_delete_matrix(daf, rows_axis, columns_axis, name) + update_caches_and_delete_matrix(daf, rows_axis, columns_axis, name, _for_set) end if relayout && Formats.format_has_matrix(daf, columns_axis, rows_axis, name) - update_caches_and_delete_matrix(daf, columns_axis, rows_axis, name) + update_caches_and_delete_matrix(daf, columns_axis, rows_axis, name, _for_set) end return nothing @@ -904,10 +923,11 @@ function update_caches_and_delete_matrix( rows_axis::AbstractString, columns_axis::AbstractString, name::AbstractString, + for_set::Bool, )::Nothing Formats.invalidate_cached!(daf, Formats.matrix_names_cache_key(rows_axis, columns_axis)) Formats.invalidate_cached!(daf, Formats.matrix_cache_key(rows_axis, columns_axis, name)) - return Formats.format_delete_matrix!(daf, rows_axis, columns_axis, name; for_set = false) + return Formats.format_delete_matrix!(daf, rows_axis, columns_axis, name; for_set = for_set) end function require_no_matrix( diff --git a/test/data.jl b/test/data.jl index afa4159..0dcc27b 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1215,7 +1215,7 @@ function test_missing_matrix(daf::DafReader, depth::Int)::Nothing end nested_test("transpose") do - @test_throws "type not in column-major layout: 3 x 4 x Int64 in Rows (transposed Dense)" get_matrix( + @test_throws "type not in column-major layout: 3 x 4 x Int64 in Rows (Transpose Dense)" get_matrix( daf, "cell", "gene", @@ -1486,7 +1486,7 @@ function test_missing_matrix(daf::DafReader, depth::Int)::Nothing end nested_test("transpose") do - @test_throws "type not in column-major layout: 3 x 4 x Int64 in Rows (transposed Dense)" set_matrix!( + @test_throws "type not in column-major layout: 3 x 4 x Int64 in Rows (Transpose Dense)" set_matrix!( daf, "cell", "gene", @@ -2697,24 +2697,6 @@ function test_existing_relayout_matrix(daf::DafReader, depth::Int)::Nothing test_existing_relayout_matrix(daf, depth + 1) return nothing end - end - - nested_test("false") do - @test set_matrix!( - daf, - "cell", - "gene", - "UMIs", - UMIS_BY_DEPTH[depth]; - overwrite = true, - relayout = false, - ) === nothing - - nested_test("delete") do - @test delete_matrix!(daf, "gene", "cell", "UMIs"; relayout = false) === nothing - test_existing_matrix(daf, depth + 1) - return nothing - end nested_test("relayout") do nested_test("overwrite") do @@ -2746,6 +2728,35 @@ function test_existing_relayout_matrix(daf::DafReader, depth::Int)::Nothing end end end + + nested_test("false") do + @test set_matrix!( + daf, + "cell", + "gene", + "UMIs", + UMIS_BY_DEPTH[depth]; + overwrite = true, + relayout = false, + ) === nothing + + nested_test("delete") do + @test_throws dedent(""" + missing matrix: UMIs + for the rows axis: gene + and the columns axis: cell + in the daf data: $(daf.name) + """) delete_matrix!(daf, "gene", "cell", "UMIs"; relayout = false) === nothing + test_existing_matrix(daf, depth + 1) + return nothing + end + + nested_test("relayout") do + @test relayout_matrix!(daf, "cell", "gene", "UMIs") === nothing + test_existing_relayout_matrix(daf, depth + 1) + return nothing + end + end end end end diff --git a/test/messages.jl b/test/messages.jl index 4511f19..0a4737b 100644 --- a/test/messages.jl +++ b/test/messages.jl @@ -88,7 +88,7 @@ nested_test("messages") do end nested_test("transpose") do - @test depict(transpose(matrix)) == "3 x 2 x Int64 in Rows (transposed Dense)" + @test depict(transpose(matrix)) == "3 x 2 x Int64 in Rows (Transpose Dense)" end nested_test("read_only") do @@ -108,7 +108,7 @@ nested_test("messages") do end nested_test("transpose") do - @test depict(transpose(matrix)) == "3 x 2 x Int64 in Rows (transposed Sparse UInt8 83%)" + @test depict(transpose(matrix)) == "3 x 2 x Int64 in Rows (Transpose Sparse UInt8 83%)" end nested_test("read_only") do