diff --git a/100-pandas-puzzles.ipynb b/100-pandas-puzzles.ipynb index bcafc2be8..0a6877830 100644 --- a/100-pandas-puzzles.ipynb +++ b/100-pandas-puzzles.ipynb @@ -41,12 +41,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -57,12 +59,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.2.2\n" + ] + } + ], + "source": [ + "print(pd.__version__)" + ] }, { "cell_type": "markdown", @@ -73,12 +85,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/python/3.10.13/lib/python3.10/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.\n", + " warnings.warn(\"Setuptools is replacing distutils.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "INSTALLED VERSIONS\n", + "------------------\n", + "commit : d9cdd2ee5a58015ef6f4d15c7226110c9aab8140\n", + "python : 3.10.13.final.0\n", + "python-bits : 64\n", + "OS : Linux\n", + "OS-release : 6.2.0-1019-azure\n", + "Version : #19~22.04.1-Ubuntu SMP Wed Jan 10 22:57:03 UTC 2024\n", + "machine : x86_64\n", + "processor : x86_64\n", + "byteorder : little\n", + "LC_ALL : None\n", + "LANG : C.UTF-8\n", + "LOCALE : en_US.UTF-8\n", + "\n", + "pandas : 2.2.2\n", + "numpy : 1.26.4\n", + "pytz : 2024.1\n", + "dateutil : 2.9.0.post0\n", + "setuptools : 68.2.2\n", + "pip : 24.0\n", + "Cython : None\n", + "pytest : None\n", + "hypothesis : None\n", + "sphinx : None\n", + "blosc : None\n", + "feather : None\n", + "xlsxwriter : None\n", + "lxml.etree : None\n", + "html5lib : None\n", + "pymysql : None\n", + "psycopg2 : None\n", + "jinja2 : 3.1.3\n", + "IPython : 8.24.0\n", + "pandas_datareader : None\n", + "adbc-driver-postgresql: None\n", + "adbc-driver-sqlite : None\n", + "bs4 : 4.12.3\n", + "bottleneck : None\n", + "dataframe-api-compat : None\n", + "fastparquet : None\n", + "fsspec : 2024.3.1\n", + "gcsfs : None\n", + "matplotlib : 3.8.4\n", + "numba : None\n", + "numexpr : None\n", + "odfpy : None\n", + "openpyxl : None\n", + "pandas_gbq : None\n", + "pyarrow : None\n", + "pyreadstat : None\n", + "python-calamine : None\n", + "pyxlsb : None\n", + "s3fs : None\n", + "scipy : 1.13.0\n", + "sqlalchemy : None\n", + "tables : None\n", + "tabulate : None\n", + "xarray : None\n", + "xlrd : None\n", + "zstandard : None\n", + "tzdata : 2024.1\n", + "qtpy : None\n", + "pyqt5 : None\n" + ] + } + ], + "source": [ + "pd.show_versions()" + ] }, { "cell_type": "markdown", @@ -112,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": true }, @@ -127,7 +221,7 @@ "\n", "labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']\n", "\n", - "df = # (complete this line of code)" + "df = pd.DataFrame(data = data,index=labels)" ] }, { @@ -139,12 +233,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
age8.03.43752.0077970.52.3753.04.6257.0
visits10.01.90000.8755951.01.0002.02.7503.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "age 8.0 3.4375 2.007797 0.5 2.375 3.0 4.625 7.0\n", + "visits 10.0 1.9000 0.875595 1.0 1.000 2.0 2.750 3.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe().T" + ] }, { "cell_type": "markdown", @@ -155,12 +320,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "c snake 0.5 2 no" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(3)" + ] }, { "cell_type": "markdown", @@ -171,12 +403,113 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
acat2.5
bcat3.0
csnake0.5
ddogNaN
edog5.0
fcat2.0
gsnake4.5
hcatNaN
idog7.0
jdog3.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "a cat 2.5\n", + "b cat 3.0\n", + "c snake 0.5\n", + "d dog NaN\n", + "e dog 5.0\n", + "f cat 2.0\n", + "g snake 4.5\n", + "h cat NaN\n", + "i dog 7.0\n", + "j dog 3.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['animal','age']]" + ] }, { "cell_type": "markdown", @@ -187,12 +520,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
ddogNaN
edog5.0
idog7.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "d dog NaN\n", + "e dog 5.0\n", + "i dog 7.0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['animal','age']].iloc[[3,4,8]]" + ] }, { "cell_type": "markdown", @@ -203,12 +595,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [animal, age, visits, priority]\n", + "Index: []" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.visits > 3]" + ] }, { "cell_type": "markdown", @@ -219,12 +656,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
ddogNaN3yes
hcatNaN1yes
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "d dog NaN 3 yes\n", + "h cat NaN 1 yes" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.age.isnull()]" + ] }, { "cell_type": "markdown", @@ -235,12 +731,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
fcat2.03no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "f cat 2.0 3 no" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df['animal'] == \"cat\")& (df['age']<3)]" + ] }, { "cell_type": "markdown", @@ -251,12 +806,87 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
fcat2.03no
jdog3.01no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "f cat 2.0 3 no\n", + "j dog 3.0 1 no" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df['age']>=2) & (df['age']<= 4)]" + ] }, { "cell_type": "markdown", @@ -267,12 +897,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc['f','age'] = 1.5" + ] }, { "cell_type": "markdown", @@ -283,12 +915,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.visits.sum()" + ] }, { "cell_type": "markdown", @@ -299,12 +944,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "3.375" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.age.mean()" + ] }, { "cell_type": "markdown", @@ -320,7 +978,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df." + ] }, { "cell_type": "markdown", @@ -1452,7 +2112,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1466,7 +2126,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.10.13" } }, "nbformat": 4,