Skip to content

Commit

Permalink
Add concatenate.
Browse files Browse the repository at this point in the history
  • Loading branch information
orenbenkiki committed Mar 23, 2024
1 parent bc046d6 commit 10273a5
Show file tree
Hide file tree
Showing 34 changed files with 1,771 additions and 90 deletions.
1 change: 1 addition & 0 deletions daf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@
from .views import * # isort: skip
from .copies import * # isort: skip
from .adapters import * # isort: skip
from .concat import * # isort: skip
8 changes: 4 additions & 4 deletions daf/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
from .views import ViewData

__all__ = [
"daf_adapter",
"adapter",
]


@contextmanager
def daf_adapter(
def adapter(
view: DafWriter | DafReadOnly,
name: Optional[str] = None,
capture: Callable[..., DafWriter] = MemoryDaf,
Expand All @@ -43,9 +43,9 @@ def daf_adapter(
overwrite: bool = False,
) -> Iterator[DafWriter]:
"""
Invoke a computation on a ``view`` data set; copy a ``daf_view`` of the updated data set into the base ``Daf`` data
Invoke a computation on a ``view`` data set; copy a ``viewer`` of the updated data set into the base ``Daf`` data
set of the view. See the Julia
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/adapters.html#Daf.Adapters.daf_adapter>`__ for details.
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/adapters.html#Daf.Adapters.adapter>`__ for details.
"""
writer = capture(name=jl.Daf.Adapters.get_adapter_capture_name(view, name=name))
adapted = jl.Daf.Adapters.get_adapter_input(view, name=name, writer=writer)
Expand Down
80 changes: 80 additions & 0 deletions daf/concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Concatenate multiple ``Daf`` data sets along some axis. See the Julia
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/concat.html>`__ for details.
"""

from typing import AbstractSet
from typing import Literal
from typing import Mapping
from typing import Optional
from typing import Sequence

import numpy as np

from .copies import EmptyData
from .data import DafReader
from .data import DafWriter
from .data import DataKey
from .julia_import import _to_julia_array
from .julia_import import jl

__all__ = [
"concatenate",
]

#: The action for merging the values of a property from the concatenated data sets into the result data set. See the
#: Julia `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/concat.html#Daf.Concats.MergeData>`__ for details.
MergeAction = Literal["SkipProperty"] | Literal["LastValue"] | Literal["CollectAxis"]

JL_MERGE_ACTION = {
"SkipProperty": jl.Daf.SkipProperty,
"LastValue": jl.Daf.LastValue,
"CollectAxis": jl.Daf.CollectAxis,
}

#: A mapping where the key is a ``DataKey`` and the value is ``MergeAction``. See the Julia
#: `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/concat.html#Daf.Concats.MergeData>`__ for details.
#:
#: Note that in Python this is a dictionary and not a vector. This allows using the ``key: value`` notation,
#: and preserves the order of the entries since in Python dictionaries are ordered by default.
MergeData = Mapping[DataKey, MergeAction]


def concatenate(
destination: DafWriter,
axis: str | Sequence[str],
sources: Sequence[DafReader],
names: Optional[Sequence[str]] = None,
dataset_axis: Optional[str] = "dataset",
dataset_property: bool = True,
prefix: bool | Sequence[bool] = False,
prefixed: Optional[AbstractSet[str] | Sequence[AbstractSet[str]]] = None,
empty: Optional[EmptyData] = None,
sparse_if_saves_storage_fraction: float = 0.1,
merge: Optional[MergeData] = None,
overwrite: bool = False,
) -> None:
"""
Concatenate data from a ``sources`` sequence of ``Daf`` data sets into a single ``destination`` data set along one
or more concatenation ``axis``. See the Julia
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/concatenate.html>`__ for details.
"""
if merge is None:
merge_data = None
else:
merge_data = jl._pairify_merge([(key, JL_MERGE_ACTION[value]) for key, value in merge.items()])

jl.Daf.concatenate(
destination,
_to_julia_array(axis),
jl.pyconvert(jl._DafReadersVector, np.array(sources)),
names=_to_julia_array(names),
dataset_axis=dataset_axis,
dataset_property=dataset_property,
prefix=_to_julia_array(prefix),
prefixed=_to_julia_array(prefixed),
empty=empty,
sparse_if_saves_storage_fraction=sparse_if_saves_storage_fraction,
merge=merge_data,
overwrite=overwrite,
)
4 changes: 2 additions & 2 deletions daf/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,10 @@ def get_pd_frame(
def read_only(self, *, name: Optional[str] = None) -> "DafReadOnly":
"""
Wrap the ``Daf`` data sett with a ``DafReadOnlyWrapper`` to protect it against accidental modification. See the
Julia `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/data.html#Daf.ReadOnly.daf_read_only>`__ for
Julia `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/data.html#Daf.ReadOnly.read_only>`__ for
details.
"""
return DafReadOnly(jl.Daf.daf_read_only(self.jl_obj, name=name))
return DafReadOnly(jl.Daf.read_only(self.jl_obj, name=name))


class DafReadOnly(DafReader):
Expand Down
14 changes: 14 additions & 0 deletions daf/julia_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ def _jl_pairs(mapping: Mapping | None) -> Sequence[Tuple[str, Any]] | None:
return list(mapping.items())


jl.seval("_DafReadersVector = Vector{DafReader}") # NOT F-STRING

jl.seval(
"""
function _pairify_columns(items::Maybe{AbstractVector})::Maybe{Daf.QueryColumns}
Expand Down Expand Up @@ -259,6 +261,18 @@ def _jl_pairs(mapping: Mapping | None) -> Sequence[Tuple[str, Any]] | None:
"""
)

jl.seval(
"""
function _pairify_merge(items::Maybe{AbstractVector})::Maybe{Daf.MergeData}
if items == nothing
return nothing
else
return [key => query for (key, query) in items]
end
end
"""
)

jl.seval(
"""
function pyconvert_rule_jl_object(::Type{T}, x::Py) where {T}
Expand Down
16 changes: 8 additions & 8 deletions daf/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .queries import Query

__all__ = [
"daf_view",
"viewer",
"ViewAxes",
"ViewData",
"ALL_SCALARS",
Expand All @@ -23,19 +23,19 @@
"ALL_MATRICES",
]

#: A key to use in the ``data`` parameter of ``daf_view`` to specify all the base data scalars. See the Julia
#: A key to use in the ``data`` parameter of ``viewer`` to specify all the base data scalars. See the Julia
#: `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.ALL_SCALARS>`__ for details.
ALL_SCALARS = "*"

#: A pair to use in the ``axes`` parameter of ``daf_view`` to specify all the base data axes. See the Julia
#: A pair to use in the ``axes`` parameter of ``viewer`` to specify all the base data axes. See the Julia
#: `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.ALL_AXES>`__ for details.
ALL_AXES = "*"

#: A key to use in the ``data`` parameter of ``daf_view`` to specify all the vectors of the exposed axes. See the Julia
#: A key to use in the ``data`` parameter of ``viewer`` to specify all the vectors of the exposed axes. See the Julia
#: `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.ALL_VECTORS>`__ for details.
ALL_VECTORS = ("*", "*")

#: A key to use in the ``data`` parameter of ``daf_view`` to specify all the matrices of the exposed axes. See the Julia
#: A key to use in the ``data`` parameter of ``viewer`` to specify all the matrices of the exposed axes. See the Julia
#: `documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.ALL_MATRICES>`__ for details.
ALL_MATRICES = ("*", "*", "*")

Expand All @@ -54,17 +54,17 @@
ViewData = Mapping[DataKey, str | Query | None]


def daf_view(
def viewer(
dset: DafReader, *, name: Optional[str] = None, axes: Optional[ViewAxes] = None, data: Optional[ViewData] = None
) -> DafReadOnly:
"""
Wrap ``Daf`` data set with a read-only ``DafView``. See the Julia
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.daf_view>`__ for details.
`documentation <https://tanaylab.github.io/Daf.jl/v0.1.0/views.html#Daf.Views.viewer>`__ for details.
The order of the axes and data matters. Luckily, the default dictionary type is ordered in modern Python, write
``axes = {ALL_AXES: None, "cell": "obs"}`` you can trust that the ``cell`` axis will be exposed as ``obs`` (and
similarly for ``data``).
"""
return DafReadOnly(
jl.Daf.daf_view(dset, name=name, axes=jl._pairify_axes(_jl_pairs(axes)), data=jl._pairify_data(_jl_pairs(data)))
jl.Daf.viewer(dset, name=name, axes=jl._pairify_axes(_jl_pairs(axes)), data=jl._pairify_data(_jl_pairs(data)))
)
1 change: 1 addition & 0 deletions docs/API.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ API
views
copies
adapters
concat
storage_types
5 changes: 5 additions & 0 deletions docs/concat.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
concat
======

.. automodule:: daf.concat
:members:
31 changes: 27 additions & 4 deletions docs/v0.1.0/html/API.html
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@
</a>
</li>

<li class="toctree-l2">
<a class="reference internal" href="concat.html">concat
</a>
</li>

<li class="toctree-l2">
<a class="reference internal" href="storage_types.html">storage_types
</a>
Expand Down Expand Up @@ -1394,9 +1399,9 @@ <h1>API
<ul>

<li class="toctree-l2">
<a class="reference internal" href="views.html#daf.views.daf_view">
<a class="reference internal" href="views.html#daf.views.viewer">
<code class="docutils literal notranslate">
<span class="pre">daf_view()
<span class="pre">viewer()
</span>
</code>
</a>
Expand Down Expand Up @@ -1529,9 +1534,27 @@ <h1>API
<ul>

<li class="toctree-l2">
<a class="reference internal" href="adapters.html#daf.adapters.daf_adapter">
<a class="reference internal" href="adapters.html#daf.adapters.adapter">
<code class="docutils literal notranslate">
<span class="pre">adapter()
</span>
</code>
</a>
</li>

</ul>

</li>

<li class="toctree-l1">
<a class="reference internal" href="concat.html">concat
</a>
<ul>

<li class="toctree-l2">
<a class="reference internal" href="concat.html#daf.concat.concatenate">
<code class="docutils literal notranslate">
<span class="pre">daf_adapter()
<span class="pre">concatenate()
</span>
</code>
</a>
Expand Down
12 changes: 6 additions & 6 deletions docs/v0.1.0/html/_modules/daf/adapters.html
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ <h1>Source code for daf.adapters</h1><div class="highlight"><pre>
<span class="kn">from</span> <span class="nn">.views</span> <span class="kn">import</span> <span class="n">ViewData</span>

<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;daf_adapter&quot;</span><span class="p">,</span>
<span class="s2">&quot;adapter&quot;</span><span class="p">,</span>
<span class="p">]</span>


<div class="viewcode-block" id="daf_adapter">
<a class="viewcode-back" href="../../adapters.html#daf.adapters.daf_adapter">[docs]</a>
<div class="viewcode-block" id="adapter">
<a class="viewcode-back" href="../../adapters.html#daf.adapters.adapter">[docs]</a>
<span class="nd">@contextmanager</span>
<span class="k">def</span> <span class="nf">daf_adapter</span><span class="p">(</span>
<span class="k">def</span> <span class="nf">adapter</span><span class="p">(</span>
<span class="n">view</span><span class="p">:</span> <span class="n">DafWriter</span> <span class="o">|</span> <span class="n">DafReadOnly</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">capture</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">DafWriter</span><span class="p">]</span> <span class="o">=</span> <span class="n">MemoryDaf</span><span class="p">,</span>
Expand All @@ -124,9 +124,9 @@ <h1>Source code for daf.adapters</h1><div class="highlight"><pre>
<span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">DafWriter</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Invoke a computation on a ``view`` data set; copy a ``daf_view`` of the updated data set into the base ``Daf`` data</span>
<span class="sd"> Invoke a computation on a ``view`` data set; copy a ``viewer`` of the updated data set into the base ``Daf`` data</span>
<span class="sd"> set of the view. See the Julia</span>
<span class="sd"> `documentation &lt;https://tanaylab.github.io/Daf.jl/v0.1.0/adapters.html#Daf.Adapters.daf_adapter&gt;`__ for details.</span>
<span class="sd"> `documentation &lt;https://tanaylab.github.io/Daf.jl/v0.1.0/adapters.html#Daf.Adapters.adapter&gt;`__ for details.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">writer</span> <span class="o">=</span> <span class="n">capture</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">jl</span><span class="o">.</span><span class="n">Daf</span><span class="o">.</span><span class="n">Adapters</span><span class="o">.</span><span class="n">get_adapter_capture_name</span><span class="p">(</span><span class="n">view</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">))</span>
<span class="n">adapted</span> <span class="o">=</span> <span class="n">jl</span><span class="o">.</span><span class="n">Daf</span><span class="o">.</span><span class="n">Adapters</span><span class="o">.</span><span class="n">get_adapter_input</span><span class="p">(</span><span class="n">view</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">writer</span><span class="o">=</span><span class="n">writer</span><span class="p">)</span>
Expand Down
Loading

0 comments on commit 10273a5

Please sign in to comment.