diff --git a/polars-missing-data/tutorial_code.ipynb b/polars-missing-data/tutorial_code.ipynb index d4efb09e7d..b887c2918d 100644 --- a/polars-missing-data/tutorial_code.ipynb +++ b/polars-missing-data/tutorial_code.ipynb @@ -20,16 +20,92 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "22c85bb2-8b10-4075-ab58-3b212f1ed050", + "execution_count": 2, + "id": "8a05aa96-ae34-41de-a7ef-1498e6d94cab", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (180, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
128.973.0"Male"true"Fri""Dinner"
222.493.5"Male"false"Fri""Dinner"
35.751.0"Female"true"Fri"null
4nullnull"Male"true"Fri""Dinner"
522.753.25"Female"false"Fri""Dinner"
17640.553.0"Male"true"Sun""Dinner"
17720.695.0"Male"false"Sun""Dinner"
17820.93.5"Female"true"Sun""Dinner"
17930.462.0"Male"true"Sun""Dinner"
18018.153.5"Female"true"Sun""Dinner"
" + ], + "text/plain": [ + "shape: (180, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 1 ┆ 28.97 ┆ 3.0 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n", + "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n", + "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 5 ┆ 22.75 ┆ 3.25 ┆ Female ┆ false ┆ Fri ┆ Dinner │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 176 ┆ 40.55 ┆ 3.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n", + "│ 177 ┆ 20.69 ┆ 5.0 ┆ Male ┆ false ┆ Sun ┆ Dinner │\n", + "│ 178 ┆ 20.9 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n", + "│ 179 ┆ 30.46 ┆ 2.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n", + "│ 180 ┆ 18.15 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", "tips = pl.scan_parquet(\"tips.parquet\")\n", "\n", - "tips.null_count().collect()" + "tips.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "22c85bb2-8b10-4075-ab58-3b212f1ed050", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 7)
record_idtotaltipgendersmokerdaytime
u32u32u32u32u32u32u32
0240002
" + ], + "text/plain": [ + "shape: (1, 7)\n", + "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 │\n", + "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n", + "│ 0 ┆ 2 ┆ 4 ┆ 0 ┆ 0 ┆ 0 ┆ 2 │\n", + "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(tips.null_count()).collect()" ] }, { @@ -42,27 +118,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "11bc9817-6c80-492d-8846-48451e68fcb1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
4nullnull"Male"true"Fri""Dinner"
18nullnull"Female"true"Fri""Lunch"
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 18 ┆ null ┆ null ┆ Female ┆ true ┆ Fri ┆ Lunch │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", "tips = pl.scan_parquet(\"tips.parquet\")\n", "\n", - "tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()).collect()" + "(tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null())).collect()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, + "id": "d79f6c04-cfcd-45e5-aa36-4a097d6e2082", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (0, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
" + ], + "text/plain": [ + "shape: (0, 7)\n", + "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n", + "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " tips.drop_nulls(pl.col(\"total\")).filter(\n", + " pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n", + " )\n", + ").collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "8b7de256-b058-4b6d-b802-822019b0b7eb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (0, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
" + ], + "text/plain": [ + "shape: (0, 7)\n", + "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n", + "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", - " tips.drop_nulls(\"total\")\n", + " tips.drop_nulls(pl.col(\"total\"))\n", " .with_columns(pl.col(\"tip\").fill_null(0))\n", " .filter(pl.col(\"tip\").is_null())\n", ").collect()" @@ -78,34 +251,125 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "10fd34e7-e94e-47f1-b9da-533b0550c9b7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
35.751.0"Female"true"Fri"null
158.581.92"Male"true"Fri"null
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬──────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪══════╡\n", + "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n", + "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ null │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴──────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", "tips = pl.scan_parquet(\"tips.parquet\")\n", "\n", - "tips.filter(pl.col(\"time\").is_null()).collect()" + "(tips.filter(pl.col(\"time\").is_null())).collect()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "a84196c9-5032-4650-83dd-176319b6eed5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (6, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
222.493.5"Male"false"Fri""Dinner"
35.751.0"Female"true"Fri"null
4nullnull"Male"true"Fri""Dinner"
1413.423.48"Female"true"Fri""Lunch"
158.581.92"Male"true"Fri"null
1615.983.0"Female"false"Fri""Lunch"
" + ], + "text/plain": [ + "shape: (6, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n", + "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n", + "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 14 ┆ 13.42 ┆ 3.48 ┆ Female ┆ true ┆ Fri ┆ Lunch │\n", + "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ null │\n", + "│ 16 ┆ 15.98 ┆ 3.0 ┆ Female ┆ false ┆ Fri ┆ Lunch │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16])).collect()" + "(tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16]))).collect()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "acfdafa7-c9e0-49cc-8b1e-e4366ce2ac59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
35.751.0"Female"true"Fri""Dinner"
158.581.92"Male"true"Fri""Lunch"
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ Dinner │\n", + "│ 15 ┆ 8.58 ┆ 1.92 ┆ Male ┆ true ┆ Fri ┆ Lunch │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " tips.drop_nulls(\"total\")\n", @@ -129,23 +393,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "19504937-9a8b-48c9-b504-62db2bff178c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
4nullnull"Male"true"Fri""Dinner"
18nullnull"Female"true"Fri""Lunch"
" + ], + "text/plain": [ + "shape: (2, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 4 ┆ null ┆ null ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 18 ┆ null ┆ null ┆ Female ┆ true ┆ Fri ┆ Lunch │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "tips = pl.scan_parquet(\"tips.parquet\")\n", + "import polars as pl\n", "\n", - "(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91b280c0-f7f7-4874-86b6-df349b8b6927", - "metadata": {}, - "outputs": [], - "source": [ "tips = pl.scan_parquet(\"tips.parquet\")\n", "\n", "(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()" @@ -153,20 +436,88 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "0d5ba705-e675-4935-8aab-958a539bd66a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (178, 7)
record_idtotaltipgendersmokerdaytime
i64f64f64strboolstrstr
128.973.0"Male"true"Fri""Dinner"
222.493.5"Male"false"Fri""Dinner"
35.751.0"Female"true"Fri"null
522.753.25"Female"false"Fri""Dinner"
640.174.73"Male"true"Fri""Dinner"
17640.553.0"Male"true"Sun""Dinner"
17720.695.0"Male"false"Sun""Dinner"
17820.93.5"Female"true"Sun""Dinner"
17930.462.0"Male"true"Sun""Dinner"
18018.153.5"Female"true"Sun""Dinner"
" + ], + "text/plain": [ + "shape: (178, 7)\n", + "┌───────────┬───────┬──────┬────────┬────────┬─────┬────────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 ┆ str ┆ bool ┆ str ┆ str │\n", + "╞═══════════╪═══════╪══════╪════════╪════════╪═════╪════════╡\n", + "│ 1 ┆ 28.97 ┆ 3.0 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ 2 ┆ 22.49 ┆ 3.5 ┆ Male ┆ false ┆ Fri ┆ Dinner │\n", + "│ 3 ┆ 5.75 ┆ 1.0 ┆ Female ┆ true ┆ Fri ┆ null │\n", + "│ 5 ┆ 22.75 ┆ 3.25 ┆ Female ┆ false ┆ Fri ┆ Dinner │\n", + "│ 6 ┆ 40.17 ┆ 4.73 ┆ Male ┆ true ┆ Fri ┆ Dinner │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 176 ┆ 40.55 ┆ 3.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n", + "│ 177 ┆ 20.69 ┆ 5.0 ┆ Male ┆ false ┆ Sun ┆ Dinner │\n", + "│ 178 ┆ 20.9 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n", + "│ 179 ┆ 30.46 ┆ 2.0 ┆ Male ┆ true ┆ Sun ┆ Dinner │\n", + "│ 180 ┆ 18.15 ┆ 3.5 ┆ Female ┆ true ┆ Sun ┆ Dinner │\n", + "└───────────┴───────┴──────┴────────┴────────┴─────┴────────┘" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "tips = pl.scan_parquet(\"tips.parquet\")\n", + "\n", "(tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "29a6aab6-edb5-42cc-998b-7bd82f45ce8c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 7)
record_idtotaltipgendersmokerdaytime
u32u32u32u32u32u32u32
0000000
" + ], + "text/plain": [ + "shape: (1, 7)\n", + "┌───────────┬───────┬─────┬────────┬────────┬─────┬──────┐\n", + "│ record_id ┆ total ┆ tip ┆ gender ┆ smoker ┆ day ┆ time │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 ┆ u32 │\n", + "╞═══════════╪═══════╪═════╪════════╪════════╪═════╪══════╡\n", + "│ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n", + "└───────────┴───────┴─────┴────────┴────────┴─────┴──────┘" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -175,11 +526,7 @@ "(\n", " tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))\n", " .with_columns(pl.col(\"tip\").fill_null(0))\n", - " .with_columns(\n", - " pl.when(pl.col(\"record_id\") == 2)\n", - " .then(pl.col(\"time\").fill_null(strategy=\"forward\"))\n", - " .otherwise(pl.col(\"time\").fill_null(strategy=\"backward\"))\n", - " )\n", + " .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n", ").null_count().collect()" ] }, @@ -193,10 +540,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "2e29d50f-b9f8-4545-b954-040490e6f15c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 5)
scientist_idfirst_namelast_namebirth_yeardeath_year
i64strstri64i64
1"Isaac"null16421726
2"Louis""Pasteur"18221895
3null"Einstein"null1955
4"Charles""Darwin"1809null
5"Marie""Curie"18671934
" + ], + "text/plain": [ + "shape: (5, 5)\n", + "┌──────────────┬────────────┬───────────┬────────────┬────────────┐\n", + "│ scientist_id ┆ first_name ┆ last_name ┆ birth_year ┆ death_year │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ i64 ┆ i64 │\n", + "╞══════════════╪════════════╪═══════════╪════════════╪════════════╡\n", + "│ 1 ┆ Isaac ┆ null ┆ 1642 ┆ 1726 │\n", + "│ 2 ┆ Louis ┆ Pasteur ┆ 1822 ┆ 1895 │\n", + "│ 3 ┆ null ┆ Einstein ┆ null ┆ 1955 │\n", + "│ 4 ┆ Charles ┆ Darwin ┆ 1809 ┆ null │\n", + "│ 5 ┆ Marie ┆ Curie ┆ 1867 ┆ 1934 │\n", + "└──────────────┴────────────┴───────────┴────────────┴────────────┘" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -215,10 +594,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "a6a5a990-d2cf-4dd2-8021-1a59e27c64d2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 5)
scientist_idfirst_namelast_namebirth_yeardeath_year
i64strstri64i64
1"Isaac""Unknown"16421726
2"Louis""Pasteur"18221895
3"Unknown""Einstein"01955
4"Charles""Darwin"18090
5"Marie""Curie"18671934
" + ], + "text/plain": [ + "shape: (5, 5)\n", + "┌──────────────┬────────────┬───────────┬────────────┬────────────┐\n", + "│ scientist_id ┆ first_name ┆ last_name ┆ birth_year ┆ death_year │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ str ┆ str ┆ i64 ┆ i64 │\n", + "╞══════════════╪════════════╪═══════════╪════════════╪════════════╡\n", + "│ 1 ┆ Isaac ┆ Unknown ┆ 1642 ┆ 1726 │\n", + "│ 2 ┆ Louis ┆ Pasteur ┆ 1822 ┆ 1895 │\n", + "│ 3 ┆ Unknown ┆ Einstein ┆ 0 ┆ 1955 │\n", + "│ 4 ┆ Charles ┆ Darwin ┆ 1809 ┆ 0 │\n", + "│ 5 ┆ Marie ┆ Curie ┆ 1867 ┆ 1934 │\n", + "└──────────────┴────────────┴───────────┴────────────┴────────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars.selectors as cs\n", "\n", @@ -239,10 +650,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "8b706a22-cc6a-49c9-858c-69bb3f72cb48", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 4)
productlast_yearcurrent_yearnext_year
stri64i64f64
"A"171929.0
"B"3535NaN
"C"2119null
"D"4250-inf
"E"2325inf
" + ], + "text/plain": [ + "shape: (5, 4)\n", + "┌─────────┬───────────┬──────────────┬───────────┐\n", + "│ product ┆ last_year ┆ current_year ┆ next_year │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ i64 ┆ f64 │\n", + "╞═════════╪═══════════╪══════════════╪═══════════╡\n", + "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n", + "│ B ┆ 35 ┆ 35 ┆ NaN │\n", + "│ C ┆ 21 ┆ 19 ┆ null │\n", + "│ D ┆ 42 ┆ 50 ┆ -inf │\n", + "│ E ┆ 23 ┆ 25 ┆ inf │\n", + "└─────────┴───────────┴──────────────┴───────────┘" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -253,10 +696,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "5cde06c9-1a4c-45da-991d-cda5cd27542c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 4)
productlast_yearcurrent_yearnext_year
stri64i64f64
"A"171929.0
"B"3535null
"C"2119null
"D"4250null
"E"2325null
" + ], + "text/plain": [ + "shape: (5, 4)\n", + "┌─────────┬───────────┬──────────────┬───────────┐\n", + "│ product ┆ last_year ┆ current_year ┆ next_year │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ i64 ┆ f64 │\n", + "╞═════════╪═══════════╪══════════════╪═══════════╡\n", + "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n", + "│ B ┆ 35 ┆ 35 ┆ null │\n", + "│ C ┆ 21 ┆ 19 ┆ null │\n", + "│ D ┆ 42 ┆ 50 ┆ null │\n", + "│ E ┆ 23 ┆ 25 ┆ null │\n", + "└─────────┴───────────┴──────────────┴───────────┘" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " sales_trends.with_columns(\n", @@ -269,10 +744,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "babf6ca8-101f-40f8-8224-426eeece5a81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 4)
productlast_yearcurrent_yearnext_year
stri64i64f64
"A"171929.0
"B"353535.0
"C"211917.0
"D"425058.0
"E"232527.0
" + ], + "text/plain": [ + "shape: (5, 4)\n", + "┌─────────┬───────────┬──────────────┬───────────┐\n", + "│ product ┆ last_year ┆ current_year ┆ next_year │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ str ┆ i64 ┆ i64 ┆ f64 │\n", + "╞═════════╪═══════════╪══════════════╪═══════════╡\n", + "│ A ┆ 17 ┆ 19 ┆ 29.0 │\n", + "│ B ┆ 35 ┆ 35 ┆ 35.0 │\n", + "│ C ┆ 21 ┆ 19 ┆ 17.0 │\n", + "│ D ┆ 42 ┆ 50 ┆ 58.0 │\n", + "│ E ┆ 23 ┆ 25 ┆ 27.0 │\n", + "└─────────┴───────────┴──────────────┴───────────┘" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(\n", " sales_trends.with_columns(\n", @@ -298,10 +805,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "d564123d-42da-462b-a52a-c6a815e59b0d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 4)
episodeseriestitleoriginal_date
u32u32u32u32
0221
" + ], + "text/plain": [ + "shape: (1, 4)\n", + "┌─────────┬────────┬───────┬───────────────┐\n", + "│ episode ┆ series ┆ title ┆ original_date │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ u32 ┆ u32 ┆ u32 │\n", + "╞═════════╪════════╪═══════╪═══════════════╡\n", + "│ 0 ┆ 2 ┆ 2 ┆ 1 │\n", + "└─────────┴────────┴───────┴───────────────┘" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", @@ -312,25 +847,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "000b53ba-c5d3-4a75-89d7-86c36881a078", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (1, 4)
episodeseriestitleoriginal_date
u32u32u32u32
0000
" + ], + "text/plain": [ + "shape: (1, 4)\n", + "┌─────────┬────────┬───────┬───────────────┐\n", + "│ episode ┆ series ┆ title ┆ original_date │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ u32 ┆ u32 ┆ u32 │\n", + "╞═════════╪════════╪═══════╪═══════════════╡\n", + "│ 0 ┆ 0 ┆ 0 ┆ 0 │\n", + "└─────────┴────────┴───────┴───────────────┘" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", "episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n", "\n", - "episodes.with_columns(\n", - " pl.when(pl.col(\"episode\") == 6)\n", - " .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n", - " .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n", - ").with_columns(\n", - " pl.when(pl.col(\"episode\") == 4)\n", - " .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n", - " .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n", - ").with_columns(\n", - " pl.col(\"original_date\").interpolate()\n", + "(\n", + " episodes.with_columns(\n", + " pl.when(pl.col(\"episode\") == 6)\n", + " .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n", + " .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n", + " )\n", + " .with_columns(\n", + " pl.when(pl.col(\"episode\") == 4)\n", + " .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n", + " .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n", + " )\n", + " .with_columns(pl.col(\"original_date\").interpolate())\n", ").null_count().collect()" ] }