diff --git a/polars-missing-data/tutorial_code.ipynb b/polars-missing-data/tutorial_code.ipynb
index d4efb09e7d..b887c2918d 100644
--- a/polars-missing-data/tutorial_code.ipynb
+++ b/polars-missing-data/tutorial_code.ipynb
@@ -20,16 +20,92 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "22c85bb2-8b10-4075-ab58-3b212f1ed050",
+ "execution_count": 2,
+ "id": "8a05aa96-ae34-41de-a7ef-1498e6d94cab",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
shape: (180, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
1 | 28.97 | 3.0 | "Male" | true | "Fri" | "Dinner" |
2 | 22.49 | 3.5 | "Male" | false | "Fri" | "Dinner" |
3 | 5.75 | 1.0 | "Female" | true | "Fri" | null |
4 | null | null | "Male" | true | "Fri" | "Dinner" |
5 | 22.75 | 3.25 | "Female" | false | "Fri" | "Dinner" |
… | … | … | … | … | … | … |
176 | 40.55 | 3.0 | "Male" | true | "Sun" | "Dinner" |
177 | 20.69 | 5.0 | "Male" | false | "Sun" | "Dinner" |
178 | 20.9 | 3.5 | "Female" | true | "Sun" | "Dinner" |
179 | 30.46 | 2.0 | "Male" | true | "Sun" | "Dinner" |
180 | 18.15 | 3.5 | "Female" | true | "Sun" | "Dinner" |
"
+ ],
+ "text/plain": [
+ "shape: (180, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 1 ┆ 28.97 ┆ 3.0 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n",
+ "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n",
+ "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 5 ┆ 22.75 ┆ 3.25 ┆ Female ┆ false ┆ Fri ┆ Dinner │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 176 ┆ 40.55 ┆ 3.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 177 ┆ 20.69 ┆ 5.0 ┆ Male ┆ false ┆ Sun ┆ Dinner │\n",
+ "│ 178 ┆ 20.9 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 179 ┆ 30.46 ┆ 2.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 180 ┆ 18.15 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
- "tips.null_count().collect()"
+ "tips.collect()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "22c85bb2-8b10-4075-ab58-3b212f1ed050",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (1, 7)record_id | total | tip | gender | smoker | day | time |
---|
u32 | u32 | u32 | u32 | u32 | u32 | u32 |
0 | 2 | 4 | 0 | 0 | 0 | 2 |
"
+ ],
+ "text/plain": [
+ "shape: (1, 7)\n",
+ "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 │\n",
+ "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n",
+ "│ 0 ┆ 2 ┆ 4 ┆ 0 ┆ 0 ┆ 0 ┆ 2 │\n",
+ "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(tips.null_count()).collect()"
]
},
{
@@ -42,27 +118,124 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"id": "11bc9817-6c80-492d-8846-48451e68fcb1",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (2, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
4 | null | null | "Male" | true | "Fri" | "Dinner" |
18 | null | null | "Female" | true | "Fri" | "Lunch" |
"
+ ],
+ "text/plain": [
+ "shape: (2, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 18 ┆ null ┆ null ┆ Female ┆ true ┆ Fri ┆ Lunch │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
- "tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()).collect()"
+ "(tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null())).collect()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
+ "id": "d79f6c04-cfcd-45e5-aa36-4a097d6e2082",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (0, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
"
+ ],
+ "text/plain": [
+ "shape: (0, 7)\n",
+ "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n",
+ "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(\n",
+ " tips.drop_nulls(pl.col(\"total\")).filter(\n",
+ " pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n",
+ " )\n",
+ ").collect()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
"id": "8b7de256-b058-4b6d-b802-822019b0b7eb",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (0, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
"
+ ],
+ "text/plain": [
+ "shape: (0, 7)\n",
+ "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n",
+ "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(\n",
- " tips.drop_nulls(\"total\")\n",
+ " tips.drop_nulls(pl.col(\"total\"))\n",
" .with_columns(pl.col(\"tip\").fill_null(0))\n",
" .filter(pl.col(\"tip\").is_null())\n",
").collect()"
@@ -78,34 +251,125 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "10fd34e7-e94e-47f1-b9da-533b0550c9b7",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (2, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
3 | 5.75 | 1.0 | "Female" | true | "Fri" | null |
15 | 8.58 | 1.92 | "Male" | true | "Fri" | null |
"
+ ],
+ "text/plain": [
+ "shape: (2, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬──────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪══════╡\n",
+ "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n",
+ "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ null │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴──────┘"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
- "tips.filter(pl.col(\"time\").is_null()).collect()"
+ "(tips.filter(pl.col(\"time\").is_null())).collect()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "a84196c9-5032-4650-83dd-176319b6eed5",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (6, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
2 | 22.49 | 3.5 | "Male" | false | "Fri" | "Dinner" |
3 | 5.75 | 1.0 | "Female" | true | "Fri" | null |
4 | null | null | "Male" | true | "Fri" | "Dinner" |
14 | 13.42 | 3.48 | "Female" | true | "Fri" | "Lunch" |
15 | 8.58 | 1.92 | "Male" | true | "Fri" | null |
16 | 15.98 | 3.0 | "Female" | false | "Fri" | "Lunch" |
"
+ ],
+ "text/plain": [
+ "shape: (6, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n",
+ "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n",
+ "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 14 ┆ 13.42 ┆ 3.48 ┆ Female ┆ true ┆ Fri ┆ Lunch │\n",
+ "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ null │\n",
+ "│ 16 ┆ 15.98 ┆ 3.0 ┆ Female ┆ false ┆ Fri ┆ Lunch │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16])).collect()"
+ "(tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16]))).collect()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "acfdafa7-c9e0-49cc-8b1e-e4366ce2ac59",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (2, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
3 | 5.75 | 1.0 | "Female" | true | "Fri" | "Dinner" |
15 | 8.58 | 1.92 | "Male" | true | "Fri" | "Lunch" |
"
+ ],
+ "text/plain": [
+ "shape: (2, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ Lunch │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(\n",
" tips.drop_nulls(\"total\")\n",
@@ -129,23 +393,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "19504937-9a8b-48c9-b504-62db2bff178c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (2, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
4 | null | null | "Male" | true | "Fri" | "Dinner" |
18 | null | null | "Female" | true | "Fri" | "Lunch" |
"
+ ],
+ "text/plain": [
+ "shape: (2, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 18 ┆ null ┆ null ┆ Female ┆ true ┆ Fri ┆ Lunch │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "tips = pl.scan_parquet(\"tips.parquet\")\n",
+ "import polars as pl\n",
"\n",
- "(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "91b280c0-f7f7-4874-86b6-df349b8b6927",
- "metadata": {},
- "outputs": [],
- "source": [
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()"
@@ -153,20 +436,88 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"id": "0d5ba705-e675-4935-8aab-958a539bd66a",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (178, 7)record_id | total | tip | gender | smoker | day | time |
---|
i64 | f64 | f64 | str | bool | str | str |
1 | 28.97 | 3.0 | "Male" | true | "Fri" | "Dinner" |
2 | 22.49 | 3.5 | "Male" | false | "Fri" | "Dinner" |
3 | 5.75 | 1.0 | "Female" | true | "Fri" | null |
5 | 22.75 | 3.25 | "Female" | false | "Fri" | "Dinner" |
6 | 40.17 | 4.73 | "Male" | true | "Fri" | "Dinner" |
… | … | … | … | … | … | … |
176 | 40.55 | 3.0 | "Male" | true | "Sun" | "Dinner" |
177 | 20.69 | 5.0 | "Male" | false | "Sun" | "Dinner" |
178 | 20.9 | 3.5 | "Female" | true | "Sun" | "Dinner" |
179 | 30.46 | 2.0 | "Male" | true | "Sun" | "Dinner" |
180 | 18.15 | 3.5 | "Female" | true | "Sun" | "Dinner" |
"
+ ],
+ "text/plain": [
+ "shape: (178, 7)\n",
+ "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n",
+ "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n",
+ "│ 1 ┆ 28.97 ┆ 3.0 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n",
+ "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n",
+ "│ 5 ┆ 22.75 ┆ 3.25 ┆ Female ┆ false ┆ Fri ┆ Dinner │\n",
+ "│ 6 ┆ 40.17 ┆ 4.73 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 176 ┆ 40.55 ┆ 3.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 177 ┆ 20.69 ┆ 5.0 ┆ Male ┆ false ┆ Sun ┆ Dinner │\n",
+ "│ 178 ┆ 20.9 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 179 ┆ 30.46 ┆ 2.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n",
+ "│ 180 ┆ 18.15 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n",
+ "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
+ "tips = pl.scan_parquet(\"tips.parquet\")\n",
+ "\n",
"(tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"id": "29a6aab6-edb5-42cc-998b-7bd82f45ce8c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (1, 7)record_id | total | tip | gender | smoker | day | time |
---|
u32 | u32 | u32 | u32 | u32 | u32 | u32 |
0 | 0 | 0 | 0 | 0 | 0 | 0 |
"
+ ],
+ "text/plain": [
+ "shape: (1, 7)\n",
+ "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n",
+ "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 │\n",
+ "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n",
+ "│ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n",
+ "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
@@ -175,11 +526,7 @@
"(\n",
" tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))\n",
" .with_columns(pl.col(\"tip\").fill_null(0))\n",
- " .with_columns(\n",
- " pl.when(pl.col(\"record_id\") == 2)\n",
- " .then(pl.col(\"time\").fill_null(strategy=\"forward\"))\n",
- " .otherwise(pl.col(\"time\").fill_null(strategy=\"backward\"))\n",
- " )\n",
+ " .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n",
").null_count().collect()"
]
},
@@ -193,10 +540,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"id": "2e29d50f-b9f8-4545-b954-040490e6f15c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 5)scientist_id | first_name | last_name | birth_year | death_year |
---|
i64 | str | str | i64 | i64 |
1 | "Isaac" | null | 1642 | 1726 |
2 | "Louis" | "Pasteur" | 1822 | 1895 |
3 | null | "Einstein" | null | 1955 |
4 | "Charles" | "Darwin" | 1809 | null |
5 | "Marie" | "Curie" | 1867 | 1934 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 5)\n",
+ "┌──────────────┬────────────┬───────────┬────────────┬────────────┐\n",
+ "│ scientist_id ┆ first_name ┆ last_name ┆ birth_year ┆ death_year │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ str ┆ str ┆ i64 ┆ i64 │\n",
+ "╞══════════════╪════════════╪═══════════╪════════════╪════════════╡\n",
+ "│ 1 ┆ Isaac ┆ null ┆ 1642 ┆ 1726 │\n",
+ "│ 2 ┆ Louis ┆ Pasteur ┆ 1822 ┆ 1895 │\n",
+ "│ 3 ┆ null ┆ Einstein ┆ null ┆ 1955 │\n",
+ "│ 4 ┆ Charles ┆ Darwin ┆ 1809 ┆ null │\n",
+ "│ 5 ┆ Marie ┆ Curie ┆ 1867 ┆ 1934 │\n",
+ "└──────────────┴────────────┴───────────┴────────────┴────────────┘"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
@@ -215,10 +594,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"id": "a6a5a990-d2cf-4dd2-8021-1a59e27c64d2",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 5)scientist_id | first_name | last_name | birth_year | death_year |
---|
i64 | str | str | i64 | i64 |
1 | "Isaac" | "Unknown" | 1642 | 1726 |
2 | "Louis" | "Pasteur" | 1822 | 1895 |
3 | "Unknown" | "Einstein" | 0 | 1955 |
4 | "Charles" | "Darwin" | 1809 | 0 |
5 | "Marie" | "Curie" | 1867 | 1934 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 5)\n",
+ "┌──────────────┬────────────┬───────────┬────────────┬────────────┐\n",
+ "│ scientist_id ┆ first_name ┆ last_name ┆ birth_year ┆ death_year │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ str ┆ str ┆ i64 ┆ i64 │\n",
+ "╞══════════════╪════════════╪═══════════╪════════════╪════════════╡\n",
+ "│ 1 ┆ Isaac ┆ Unknown ┆ 1642 ┆ 1726 │\n",
+ "│ 2 ┆ Louis ┆ Pasteur ┆ 1822 ┆ 1895 │\n",
+ "│ 3 ┆ Unknown ┆ Einstein ┆ 0 ┆ 1955 │\n",
+ "│ 4 ┆ Charles ┆ Darwin ┆ 1809 ┆ 0 │\n",
+ "│ 5 ┆ Marie ┆ Curie ┆ 1867 ┆ 1934 │\n",
+ "└──────────────┴────────────┴───────────┴────────────┴────────────┘"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars.selectors as cs\n",
"\n",
@@ -239,10 +650,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"id": "8b706a22-cc6a-49c9-858c-69bb3f72cb48",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 4)product | last_year | current_year | next_year |
---|
str | i64 | i64 | f64 |
"A" | 17 | 19 | 29.0 |
"B" | 35 | 35 | NaN |
"C" | 21 | 19 | null |
"D" | 42 | 50 | -inf |
"E" | 23 | 25 | inf |
"
+ ],
+ "text/plain": [
+ "shape: (5, 4)\n",
+ "┌─────────┬───────────┬──────────────┬───────────┐\n",
+ "│ product ┆ last_year ┆ current_year ┆ next_year │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ str ┆ i64 ┆ i64 ┆ f64 │\n",
+ "╞═════════╪═══════════╪══════════════╪═══════════╡\n",
+ "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n",
+ "│ B ┆ 35 ┆ 35 ┆ NaN │\n",
+ "│ C ┆ 21 ┆ 19 ┆ null │\n",
+ "│ D ┆ 42 ┆ 50 ┆ -inf │\n",
+ "│ E ┆ 23 ┆ 25 ┆ inf │\n",
+ "└─────────┴───────────┴──────────────┴───────────┘"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
@@ -253,10 +696,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"id": "5cde06c9-1a4c-45da-991d-cda5cd27542c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 4)product | last_year | current_year | next_year |
---|
str | i64 | i64 | f64 |
"A" | 17 | 19 | 29.0 |
"B" | 35 | 35 | null |
"C" | 21 | 19 | null |
"D" | 42 | 50 | null |
"E" | 23 | 25 | null |
"
+ ],
+ "text/plain": [
+ "shape: (5, 4)\n",
+ "┌─────────┬───────────┬──────────────┬───────────┐\n",
+ "│ product ┆ last_year ┆ current_year ┆ next_year │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ str ┆ i64 ┆ i64 ┆ f64 │\n",
+ "╞═════════╪═══════════╪══════════════╪═══════════╡\n",
+ "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n",
+ "│ B ┆ 35 ┆ 35 ┆ null │\n",
+ "│ C ┆ 21 ┆ 19 ┆ null │\n",
+ "│ D ┆ 42 ┆ 50 ┆ null │\n",
+ "│ E ┆ 23 ┆ 25 ┆ null │\n",
+ "└─────────┴───────────┴──────────────┴───────────┘"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(\n",
" sales_trends.with_columns(\n",
@@ -269,10 +744,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "babf6ca8-101f-40f8-8224-426eeece5a81",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 4)product | last_year | current_year | next_year |
---|
str | i64 | i64 | f64 |
"A" | 17 | 19 | 29.0 |
"B" | 35 | 35 | 35.0 |
"C" | 21 | 19 | 17.0 |
"D" | 42 | 50 | 58.0 |
"E" | 23 | 25 | 27.0 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 4)\n",
+ "┌─────────┬───────────┬──────────────┬───────────┐\n",
+ "│ product ┆ last_year ┆ current_year ┆ next_year │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ str ┆ i64 ┆ i64 ┆ f64 │\n",
+ "╞═════════╪═══════════╪══════════════╪═══════════╡\n",
+ "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n",
+ "│ B ┆ 35 ┆ 35 ┆ 35.0 │\n",
+ "│ C ┆ 21 ┆ 19 ┆ 17.0 │\n",
+ "│ D ┆ 42 ┆ 50 ┆ 58.0 │\n",
+ "│ E ┆ 23 ┆ 25 ┆ 27.0 │\n",
+ "└─────────┴───────────┴──────────────┴───────────┘"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(\n",
" sales_trends.with_columns(\n",
@@ -298,10 +805,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"id": "d564123d-42da-462b-a52a-c6a815e59b0d",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (1, 4)episode | series | title | original_date |
---|
u32 | u32 | u32 | u32 |
0 | 2 | 2 | 1 |
"
+ ],
+ "text/plain": [
+ "shape: (1, 4)\n",
+ "┌─────────┬────────┬───────┬───────────────┐\n",
+ "│ episode ┆ series ┆ title ┆ original_date │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ u32 ┆ u32 ┆ u32 ┆ u32 │\n",
+ "╞═════════╪════════╪═══════╪═══════════════╡\n",
+ "│ 0 ┆ 2 ┆ 2 ┆ 1 │\n",
+ "└─────────┴────────┴───────┴───────────────┘"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
@@ -312,25 +847,55 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"id": "000b53ba-c5d3-4a75-89d7-86c36881a078",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (1, 4)episode | series | title | original_date |
---|
u32 | u32 | u32 | u32 |
0 | 0 | 0 | 0 |
"
+ ],
+ "text/plain": [
+ "shape: (1, 4)\n",
+ "┌─────────┬────────┬───────┬───────────────┐\n",
+ "│ episode ┆ series ┆ title ┆ original_date │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ u32 ┆ u32 ┆ u32 ┆ u32 │\n",
+ "╞═════════╪════════╪═══════╪═══════════════╡\n",
+ "│ 0 ┆ 0 ┆ 0 ┆ 0 │\n",
+ "└─────────┴────────┴───────┴───────────────┘"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import polars as pl\n",
"\n",
"episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n",
"\n",
- "episodes.with_columns(\n",
- " pl.when(pl.col(\"episode\") == 6)\n",
- " .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n",
- " .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n",
- ").with_columns(\n",
- " pl.when(pl.col(\"episode\") == 4)\n",
- " .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n",
- " .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n",
- ").with_columns(\n",
- " pl.col(\"original_date\").interpolate()\n",
+ "(\n",
+ " episodes.with_columns(\n",
+ " pl.when(pl.col(\"episode\") == 6)\n",
+ " .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n",
+ " .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n",
+ " )\n",
+ " .with_columns(\n",
+ " pl.when(pl.col(\"episode\") == 4)\n",
+ " .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n",
+ " .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n",
+ " )\n",
+ " .with_columns(pl.col(\"original_date\").interpolate())\n",
").null_count().collect()"
]
}