Skip to content

Commit

Permalink
ref: Make parameters to sources explicit (#707)
Browse files Browse the repository at this point in the history
This improves the documentation and code-completion experience.
  • Loading branch information
bjchambers authored Aug 28, 2023
1 parent 31bbccb commit 868a887
Show file tree
Hide file tree
Showing 54 changed files with 409 additions and 5,366 deletions.
4,870 changes: 0 additions & 4,870 deletions python/CHANGELOG.md

This file was deleted.

31 changes: 17 additions & 14 deletions python/docs/source/examples/time_centric.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
"outputs": [],
"source": [
"import kaskada as kd\n",
"\n",
"kd.init_session()"
]
},
Expand Down Expand Up @@ -126,7 +127,7 @@
"source": [
"# For demo simplicity, instead of a CSV file, we read and then parse data from a\n",
"# CSV string. Kaskadaa\n",
"event_data_string = '''\n",
"event_data_string = \"\"\"\n",
" event_id,event_at,entity_id,event_name,revenue\n",
" ev_00001,2022-01-01 22:01:00,user_001,login,0\n",
" ev_00002,2022-01-01 22:05:00,user_001,view_item,0\n",
Expand All @@ -148,11 +149,11 @@
" ev_00018,2022-01-01 22:17:00,user_002,view_item,0\n",
" ev_00019,2022-01-01 22:18:00,user_002,view_item,0\n",
" ev_00020,2022-01-01 22:20:00,user_002,view_item,0\n",
"'''\n",
"\"\"\"\n",
"\n",
"events = kd.sources.CsvString(event_data_string,\n",
" time_column_name='event_at',\n",
" key_column_name = 'entity_id')\n",
"events = kd.sources.CsvString(\n",
" event_data_string, time_column=\"event_at\", key_column=\"entity_id\"\n",
")\n",
"\n",
"# Inspect the event data\n",
"events.preview()"
Expand Down Expand Up @@ -214,14 +215,16 @@
"source": [
"purchases = events.filter(events.col(\"event_name\").eq(\"purchase\"))\n",
"\n",
"features = kd.record({\n",
" \"event_count_total\": events.count(),\n",
" #\"event_count_hourly\": events.count(window=Hourly()),\n",
" \"purchases_total_count\": purchases.count(),\n",
" #\"purchases_today\": purchases.count(window=Since(Daily()),\n",
" #\"revenue_today\": events.col(\"revenue\").sum(window=Since(Daily())),\n",
" \"revenue_total\": events.col(\"revenue\").sum(),\n",
"})\n",
"features = kd.record(\n",
" {\n",
" \"event_count_total\": events.count(),\n",
" # \"event_count_hourly\": events.count(window=Hourly()),\n",
" \"purchases_total_count\": purchases.count(),\n",
" # \"purchases_today\": purchases.count(window=Since(Daily()),\n",
" # \"revenue_today\": events.col(\"revenue\").sum(window=Since(Daily())),\n",
" \"revenue_total\": events.col(\"revenue\").sum(),\n",
" }\n",
")\n",
"features.preview()"
]
},
Expand Down Expand Up @@ -336,4 +339,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
4 changes: 2 additions & 2 deletions python/docs/source/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ kd.init_session()
messages = kd.sources.PyList(
rows = pyarrow.parquet.read_table("./messages.parquet")
.to_pylist(),
time_column_name = "ts",
key_column_name = "channel",
time_column = "ts",
key_column = "channel",
)

# Send each Slack message to Kaskada
Expand Down
2 changes: 1 addition & 1 deletion python/docs/source/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ content = "\n".join(
"1996-12-19T16:40:02,A,,",
]
)
source = kd.sources.CsvString(content, time_column_name="time", key_column_name="key")
source = kd.sources.CsvString(content, time_column="time", key_column="key")
source.run().to_pandas()
```
2 changes: 1 addition & 1 deletion python/docs/source/tour.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ single_entity = "\n".join(
"1996-12-24T16:40:02,A,,",
]
)
single_entity = kd.sources.CsvString(single_entity, time_column_name="time", key_column_name="key")
single_entity = kd.sources.CsvString(single_entity, time_column="time", key_column="key")
```

## Events and Aggregations
Expand Down
446 changes: 128 additions & 318 deletions python/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ nox = "2023.4.22"
optional = true

[tool.poetry.group.lint.dependencies]
black = ">=21.10b0"
black = { version = ">=21.10b0", extras = ["jupyter"] }
darglint = ">=1.8.1"
flake8 = ">=4.0.1"
flake8-bugbear = ">=21.9.2"
Expand Down
6 changes: 3 additions & 3 deletions python/pysrc/kaskada/_ffi.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ class Table(Expr):
self,
session: Session,
name: str,
time_column_name: str,
key_column_name: str,
time_column: str,
key_column: str,
schema: pa.Schema,
subsort_column_name: Optional[str],
subsort_column: Optional[str],
grouping_name: Optional[str],
time_unit: Optional[str],
) -> None: ...
Expand Down
Loading

0 comments on commit 868a887

Please sign in to comment.