-
Notifications
You must be signed in to change notification settings - Fork 0
Types Terminology Mindmap
Thor Whalen edited this page Jul 13, 2023
·
1 revision
- Source: Specification of source input (not input itself)
- Reader: Example, a generator of source data
- Selector: Selects data
- Generator: Iterable component
- Buffer: Temporarily saves a limited amount of data
- Segment: data that occurred during a given time interval
- Segmenter: Provides a generator of "segments"
- Chunker: Segmenter outputing fix-length segments
- Filter: Output less than input
- Merger: Aggregates multiple inputs
- Featurizer: Computes features of input
- Learner: A component that has learnable params
- Model: (fitted learner)
- Iterable: You can iterate over, consume (e.g. arrays, streams, DB cursors, etc.)
- Sliceable: Sequence where you can get data given interval
- Learnable: Has parameters that can be "trained" by data
- Horizontal/Vertical: To denote dimensions of data. Horizontal would be the time (or other order) axis, vertical the "field" dimension
- Feed: Input, source, define input declaratively
- Select: Filter, extract, restructure, prepare, ...
- Do: Retrieve, transform, store, compute, ...
from functools import partial
from typing import Union, Protocol, Tuple, Any, Callable
from i2.wrapper import Wrap, wrap
ChannelKey = Any
TS = Union[int, float] # timestamp
BT = TS
TT = TS
SegmentKey = Tuple[ChannelKey, BT, TT] # normalized format
def make_key_1(bt, tt, ch: ChannelKey) -> SegmentKey:
"""This transformation is just a permutation"""
return ch, bt, tt
assert make_key_1(2, 5, 'plc') == ('plc', 2, 5)
def make_key_2(bttt_slice: slice, ch: ChannelKey) -> SegmentKey:
"""This one needs to extract the bt and tt from a slice"""
return ch, bttt_slice.start, bttt_slice.stop
assert make_key_2(slice(2, 5), 'plc') == ('plc', 2, 5)
def make_key_3(
ch: ChannelKey,
bt,
bt2tt: Callable[[BT], TT] = lambda bt: bt + 3 # to be partialized!
) -> SegmentKey:
"""This one doesn't even have tt in the input, but deduces it from bt"""
return ch, bt, bt2tt(bt)
assert make_key_3('plc', 2) == ('plc', 2, 5)
args_only_to_proper_ingress = partial(wrap, egress=lambda args: (args, {})) # TODO: rid of lambda
class SegmentKeyTrans(Protocol):
def __call__(self, *args, **kwargs) -> SegmentKey:
"""A function that transforms a segment key expressed how the user wants it, to a normalize format."""
def base_slicer(ch, bt, tt, stream_map):
return stream_map[ch][bt:tt]
# def df_slicer(ch, bt, tt, stream_map, time_col):
# df = stream_map[ch]
# return df[df[time_col] >= bt & df[time_col] < tt]
stream_map = {
'plc': 'abcdefghijklmnopqrstuvwxyz',
'audio': range(3, 3 + 26)
}
bound_slicer = partial(base_slicer, stream_map=stream_map)
assert bound_slicer('plc', 2, 5) == 'cde'
slicer_1 = wrap(bound_slicer, ingress=args_only_to_proper_ingress(make_key_1))
assert slicer_1(2, 5, 'plc') == 'cde'
def foo(stream_map, key_trans):
bound_slicer = partial(base_slicer, stream_map=stream_map)
return wrap(bound_slicer, ingress=args_only_to_proper_ingress(key_trans))
slicer_3 = foo(stream_map, key_trans=make_key_3)
assert slicer_3('plc', 10) == 'klm'