diff --git a/PyData/bookmarks.txt b/PyData/bookmarks.txt
index 425a989..55e9bb9 100644
--- a/PyData/bookmarks.txt
+++ b/PyData/bookmarks.txt
@@ -11,6 +11,7 @@ https://hakibenita.com/postgresql-unknown-features
# PySpark
https://jacobcelestine.com/knowledge_repo/colab_and_pyspark/
https://sparkbyexamples.com/
+chaining: https://mungingdata.com/pyspark/chaining-dataframe-transformations/
https://phoenixnap.com/kb/install-spark-on-ubuntu
https://www.how2shout.com/linux/installing-apache-spark-on-ubuntu-20-04-or-18-04/
installing: https://computingforgeeks.com/how-to-install-apache-spark-on-ubuntu-debian/
diff --git a/ibis-framework/Basic_Examples.ipynb b/ibis-framework/Basic_Examples.ipynb
new file mode 100644
index 0000000..e602d94
--- /dev/null
+++ b/ibis-framework/Basic_Examples.ipynb
@@ -0,0 +1,610 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bf4d09b7-468e-41b4-af86-b1fed4f474ec",
+ "metadata": {},
+ "source": [
+ "# ibis examples"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "9a13d6a6-c2c1-43c1-873e-9e8d870a85ca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ibis\n",
+ "import pandas as pd\n",
+ "pd.options.display.max_rows = 20\n",
+ "ibis.options.interactive = True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "c5fa1300-c01c-44ea-ae22-a9205134b8e4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "2748f73f-8d1d-4349-9d3a-05ba042db5aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(244, 7)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "2855fab5-e21e-42a7-addb-d8b3079dfeae",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 16.99 | \n",
+ " 1.01 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10.34 | \n",
+ " 1.66 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 21.01 | \n",
+ " 3.50 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 23.68 | \n",
+ " 3.31 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 24.59 | \n",
+ " 3.61 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "0 16.99 1.01 Female No Sun Dinner 2\n",
+ "1 10.34 1.66 Male No Sun Dinner 3\n",
+ "2 21.01 3.50 Male No Sun Dinner 3\n",
+ "3 23.68 3.31 Male No Sun Dinner 2\n",
+ "4 24.59 3.61 Female No Sun Dinner 4"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "fec4a567-4ce8-4997-a030-389715f00bbc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 244.000000 | \n",
+ " 244.000000 | \n",
+ " 244.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 19.785943 | \n",
+ " 2.998279 | \n",
+ " 2.569672 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 8.902412 | \n",
+ " 1.383638 | \n",
+ " 0.951100 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 3.070000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 13.347500 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 17.795000 | \n",
+ " 2.900000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 24.127500 | \n",
+ " 3.562500 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 50.810000 | \n",
+ " 10.000000 | \n",
+ " 6.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip size\n",
+ "count 244.000000 244.000000 244.000000\n",
+ "mean 19.785943 2.998279 2.569672\n",
+ "std 8.902412 1.383638 0.951100\n",
+ "min 3.070000 1.000000 1.000000\n",
+ "25% 13.347500 2.000000 2.000000\n",
+ "50% 17.795000 2.900000 2.000000\n",
+ "75% 24.127500 3.562500 3.000000\n",
+ "max 50.810000 10.000000 6.000000"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2dde52d4-31b8-4917-8dbe-f9f49aff236a",
+ "metadata": {},
+ "source": [
+ "## Convert Pandas DataFrame to ibis table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "534a7253-df69-401d-853b-5e58f885c11b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tips = ibis.pandas.connect(\n",
+ " {\n",
+ " 'tips': df,\n",
+ " }\n",
+ ").table('tips')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "7286bc75-0c8b-417e-820d-0ec1cbe07d44",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6b84479-669f-4ee3-bc7a-26f8ec8dfd4e",
+ "metadata": {},
+ "source": [
+ "## Look Up Schema / Data Type Info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "05a3a457-b960-490f-a641-257930643789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ibis.Schema { \n",
+ " total_bill float64\n",
+ " tip float64\n",
+ " sex string\n",
+ " smoker string\n",
+ " day string\n",
+ " time string\n",
+ " size int64\n",
+ "}"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.schema()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8592cd3c-9354-469b-b1ec-f6cda3dc09d2",
+ "metadata": {},
+ "source": [
+ "## Selecting certain columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "90b1900f-db27-4a1e-981f-daf3a937ed1d",
+ "metadata": {},
+ "source": [
+ "```\n",
+ "SELECT\n",
+ " total_bill,\n",
+ " tip,\n",
+ " day,\n",
+ " time\n",
+ "FROM\n",
+ " tips as tips\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0247c328-eb05-4d82-b3ef-377df65df836",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " total_bill tip day time\n",
+ "0 16.99 1.01 Sun Dinner\n",
+ "1 10.34 1.66 Sun Dinner\n",
+ "2 21.01 3.50 Sun Dinner\n",
+ "3 23.68 3.31 Sun Dinner\n",
+ "4 24.59 3.61 Sun Dinner\n",
+ ".. ... ... ... ...\n",
+ "239 29.03 5.92 Sat Dinner\n",
+ "240 27.18 2.00 Sat Dinner\n",
+ "241 22.67 2.00 Sat Dinner\n",
+ "242 17.82 1.75 Sat Dinner\n",
+ "243 18.78 3.00 Thur Dinner\n",
+ "\n",
+ "[244 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips['total_bill','tip','day','time']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1f22ef41-eea4-4373-9e64-852a99016480",
+ "metadata": {},
+ "source": [
+ "## Getting Row Count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "4d1a97cf-e185-47af-8f7f-8d9bec37e182",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "244"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.count()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d0ed6587-2250-4375-a254-2e2c9822bfdc",
+ "metadata": {},
+ "source": [
+ "## Limiting the Number of Rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ed82bff2-5680-4cb8-8f7d-91317ca97966",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " total_bill tip day time\n",
+ "0 16.99 1.01 Sun Dinner\n",
+ "1 10.34 1.66 Sun Dinner\n",
+ "2 21.01 3.50 Sun Dinner\n",
+ "3 23.68 3.31 Sun Dinner\n",
+ "4 24.59 3.61 Sun Dinner"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips['total_bill','tip','day','time'].limit(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a71c523-054a-4375-a2cf-2f804fa0103d",
+ "metadata": {},
+ "source": [
+ "## Filtering"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "599e1703-599a-4af8-9ec2-b6ddee77ca5b",
+ "metadata": {},
+ "source": [
+ "```\n",
+ "SELECT\n",
+ " total_bill,\n",
+ " tip,\n",
+ " day,\n",
+ " time\n",
+ "FROM\n",
+ " tips as tips\n",
+ "WHERE\n",
+ " tips.day = 'Sun'\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "47fd4d39-1efc-40a7-b0ad-d356a5cbe939",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " total_bill tip day time\n",
+ "0 16.99 1.01 Sun Dinner\n",
+ "1 10.34 1.66 Sun Dinner\n",
+ "2 21.01 3.50 Sun Dinner\n",
+ "3 23.68 3.31 Sun Dinner\n",
+ "4 24.59 3.61 Sun Dinner\n",
+ ".. ... ... ... ...\n",
+ "71 20.90 3.50 Sun Dinner\n",
+ "72 30.46 2.00 Sun Dinner\n",
+ "73 18.15 3.50 Sun Dinner\n",
+ "74 23.10 4.00 Sun Dinner\n",
+ "75 15.69 1.50 Sun Dinner\n",
+ "\n",
+ "[76 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips['total_bill','tip','day','time'].filter(\n",
+ " tips['day'] == 'Sun'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f7a38d4-b038-4bc5-b44d-99c46ed03e5e",
+ "metadata": {},
+ "source": [
+ "## Aggregation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "c97b0188-5420-4d94-bdef-cbc1b067ebdc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " day day_bill_avg\n",
+ "0 Fri 17.151579\n",
+ "1 Sat 20.441379\n",
+ "2 Sun 21.410000\n",
+ "3 Thur 17.682742"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.group_by('day').aggregate(tips['total_bill'].mean().name('day_bill_avg'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "6e0877e4-303d-4fbf-811d-dacfd37c34fb",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'ibis.postgres'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/tmp/ipykernel_621/1418244849.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mibis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpostgres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'ibis.postgres'"
+ ]
+ }
+ ],
+ "source": [
+ "import ibis.postgres"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Py3.8 (data_dev)",
+ "language": "python",
+ "name": "data_dev"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pandas/Aggregate_Filter_Transform.ipynb b/pandas/Aggregate_Filter_Transform.ipynb
index a8515cd..581e134 100644
--- a/pandas/Aggregate_Filter_Transform.ipynb
+++ b/pandas/Aggregate_Filter_Transform.ipynb
@@ -16,7 +16,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
- "pd.options.display.max_rows = 20"
+ "pd.options.display.max_rows = 20``"
]
},
{