diff --git a/sparrow-py/docs/source/_extensions/gallery_directive.py b/sparrow-py/docs/source/_extensions/gallery_directive.py new file mode 100644 index 000000000..878c17c33 --- /dev/null +++ b/sparrow-py/docs/source/_extensions/gallery_directive.py @@ -0,0 +1,144 @@ +"""A directive to generate a gallery of images from structured data. + +Generating a gallery of images that are all the same size is a common +pattern in documentation, and this can be cumbersome if the gallery is +generated programmatically. This directive wraps this particular use-case +in a helper-directive to generate it with a single YAML configuration file. + +It currently exists for maintainers of the pydata-sphinx-theme, +but might be abstracted into a standalone package if it proves useful. +""" +from pathlib import Path +from typing import Any, Dict, List + +from docutils import nodes +from docutils.parsers.rst import directives +from sphinx.application import Sphinx +from sphinx.util import logging +from sphinx.util.docutils import SphinxDirective +from yaml import safe_load + +logger = logging.getLogger(__name__) + + +TEMPLATE_GRID = """ +`````{{grid}} {columns} +{options} + +{content} + +````` +""" + +GRID_CARD = """ +````{{grid-item-card}} {title} +{options} + +{content} +```` +""" + + +class GalleryGridDirective(SphinxDirective): + """A directive to show a gallery of images and links in a Bootstrap grid. + + The grid can be generated from a YAML file that contains a list of items, or + from the content of the directive (also formatted in YAML). Use the parameter + "class-card" to add an additional CSS class to all cards. When specifying the grid + items, you can use all parameters from "grid-item-card" directive to customize + individual cards + ["image", "header", "content", "title"]. + + Danger: + This directive can only be used in the context of a Myst documentation page as + the templates use Markdown flavored formatting. + """ + + name = "gallery-grid" + has_content = True + required_arguments = 0 + optional_arguments = 1 + final_argument_whitespace = True + option_spec = { + # A class to be added to the resulting container + "grid-columns": directives.unchanged, + "class-container": directives.unchanged, + "class-card": directives.unchanged, + } + + def run(self) -> List[nodes.Node]: + """Create the gallery grid.""" + if self.arguments: + # If an argument is given, assume it's a path to a YAML file + # Parse it and load it into the directive content + path_data_rel = Path(self.arguments[0]) + path_doc, _ = self.get_source_info() + path_doc = Path(path_doc).parent + path_data = (path_doc / path_data_rel).resolve() + if not path_data.exists(): + logger.warn(f"Could not find grid data at {path_data}.") + nodes.text("No grid data found at {path_data}.") + return + yaml_string = path_data.read_text() + else: + yaml_string = "\n".join(self.content) + + # Use all the element with an img-bottom key as sites to show + # and generate a card item for each of them + grid_items = [] + for item in safe_load(yaml_string): + + # remove parameters that are not needed for the card options + title = item.pop("title", "") + + # build the content of the card using some extra parameters + header = f"{item.pop('header')} \n^^^ \n" if "header" in item else "" + image = f"![image]({item.pop('image')}) \n" if "image" in item else "" + content = f"{item.pop('content')} \n" if "content" in item else "" + + # optional parameter that influence all cards + if "class-card" in self.options: + item["class-card"] = self.options["class-card"] + + loc_options_str = "\n".join(f":{k}: {v}" for k, v in item.items()) + " \n" + + card = GRID_CARD.format( + options=loc_options_str, content=header + image + content, title=title + ) + grid_items.append(card) + + # Parse the template with Sphinx Design to create an output container + # Prep the options for the template grid + class_ = "gallery-directive" + f' {self.options.get("class-container", "")}' + options = {"gutter": 2, "class-container": class_} + options_str = "\n".join(f":{k}: {v}" for k, v in options.items()) + + # Create the directive string for the grid + grid_directive = TEMPLATE_GRID.format( + columns=self.options.get("grid-columns", "1 2 3 4"), + options=options_str, + content="\n".join(grid_items), + ) + + # Parse content as a directive so Sphinx Design processes it + container = nodes.container() + self.state.nested_parse([grid_directive], 0, container) + + # Sphinx Design outputs a container too, so just use that + return [container.children[0]] + + +def setup(app: Sphinx) -> Dict[str, Any]: + """Add custom configuration to sphinx app. + + Args: + app: the Sphinx application + + Returns: + the 2 parallel parameters set to ``True``. + """ + app.add_directive("gallery-grid", GalleryGridDirective) + + return { + "parallel_read_safe": True, + "parallel_write_safe": True, + } \ No newline at end of file diff --git a/sparrow-py/docs/source/conf.py b/sparrow-py/docs/source/conf.py index 1fd5bcb8c..01b7667d5 100644 --- a/sparrow-py/docs/source/conf.py +++ b/sparrow-py/docs/source/conf.py @@ -2,6 +2,10 @@ from typing import Any from typing import Dict +from pathlib import Path +import sys +sys.path.append(str(Path(".").resolve())) + project = "sparrow-py" author = "Kaskada Contributors" copyright = "2023, Kaskada Contributors" @@ -11,9 +15,11 @@ "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.todo", + "sphinx_design", # "myst_parser", "myst_nb", "sphinx_copybutton", + "_extensions.gallery_directive", ] autodoc_typehints = "description" language = "en" @@ -28,6 +34,7 @@ "use_repository_button": True, "use_source_button": True, "use_edit_page_button": True, + "home_page_in_toc": True, "use_issues_button": True, "repository_branch": "main", "path_to_docs": "sparrow-py/docs/source", @@ -44,8 +51,7 @@ "icon": "fa-brands fa-slack", }, ], - "show_nav_level": 3, - "show_toc_level": 2, + "primary_sidebar_end": ["indices.html"], } templates_path = ["_templates"] diff --git a/sparrow-py/docs/source/guide/introduction.md b/sparrow-py/docs/source/guide/introduction.md new file mode 100644 index 000000000..8317214c2 --- /dev/null +++ b/sparrow-py/docs/source/guide/introduction.md @@ -0,0 +1,38 @@ +# Introduction + +Understanding and reacting to the world in real-time requires understanding what is happening _now_ in the context of what happened in the past. +You need the ability to understand if what just happened is unusual, how it relates to what happened previously, and how it relates to other things that are happening at the same time. + +Kaskada processes events from streams and historic data sources to answer these questions in real-time. + +The power and convenience of Kaskad comes from a new: the Timestream. +Timestreams provide a declarative API like dataframes over the complete temporal context. +Easily combine multiple streams and reason about the complete sequence of events. +Use time-travel to compute training examples from historic data and understand how results change over time. + +## What are "Timestreams"? + +A [Timestream](../reference/timestream/index) describes how a value changes over time. In the same way that SQL +queries transform tables and graph queries transform nodes and edges, +Kaskada queries transform Timestreams. + +In comparison to a timeseries which often contains simple values (e.g., numeric +observations) defined at fixed, periodic times (i.e., every minute), a Timestream +contains any kind of data (records or collections as well as primitives) and may +be defined at arbitrary times corresponding to when the events occur. + +## Getting Started with Timestreams + +Getting started with Timestreams is as simple as `pip` installing the Python library, loading some data and running a query. + +```python +import timestreams as t + +# Read data from a Parquet file. +data = t.sources.Parquet.from_file( + "path_to_file.parquet", + time = "time", + key = "user") +# Get the count of events associated with each user over time, as a dataframe. +data.count().run().to_pandas() +``` \ No newline at end of file diff --git a/sparrow-py/docs/source/index.md b/sparrow-py/docs/source/index.md index c2431be87..9853e4aa1 100644 --- a/sparrow-py/docs/source/index.md +++ b/sparrow-py/docs/source/index.md @@ -1,39 +1,88 @@ --- hide-toc: true +html_theme.sidebar_secondary.remove: true +title: Kaskada Timestreams --- -# Kaskada Timestreams +
Real-time and historic event processing in Python. +
+