diff --git a/README.md b/README.md index e456010..29a0dea 100644 --- a/README.md +++ b/README.md @@ -3,50 +3,28 @@ [![license](https://img.shields.io/github/license/DAVFoundation/captain-n3m0.svg?style=flat-square)](https://github.com/DAVFoundation/captain-n3m0/blob/master/LICENSE) Open In Colab -This repository holds source code and examples on how to extract and visualize +This repository contains useful notebook examples on how to extract and visualize breast cancer data from the World Health Organization (WHO) database. -## Setup +## Dependencies -A python package without useful utility tools was developed to aid extraction -and visualization. The package is compatible with `Python >= 3.8` and has the -following dependencies: +The Notebooks were tested against Python 3.8 on macOS operating system. However, they should be +quite robust against different setups. The following dependencies were used, and are +installed directly through the notebooks: -* numpy - -* folium - -* pycountry - -* geopandas - -* Pillow - -Note that these dependencies will be installed directly when launching the Jypyter Notebooks. - -## Installation - -Start by creating a virtual environment and installing the package: - -``` -virtualenv -ppython3 venv --clear -source venv/bin/activate -python3 -m pip install who_extract@https://github.com/andreped/breast-cancer-stats.git -``` +* pandas +* plotly +* nbformat +* kaleido ## Usage -Example application notebooks are available in the [apps/](https://github.com/andreped/breast-cancer-stats/apps/) directory. - -## Troubleshoot - -1) Virtual environment activation - -To activate the virtual environment on Windows, instead of `source venv/bin/activate` run `./venv/Scripts/activate`. - -2) `ImportError: No module named selenium` +Example application notebooks are available in the [apps/](https://github.com/andreped/breast-cancer-stats/apps/) directory, and include: -To export the generated map as a PNG image, the [Firefox](https://www.mozilla.org/en-US/firefox/new/) explorer is required. +| Use case | Notebook | +| Incidence rate world map | [https://github.com/andreped/breast-cancer-stats/blob/main/apps/breast_cancer_incidence_rate_plotly.ipynb] | +| Mortality rate world map | b | +| Overall cancer statistics | [https://github.com/andreped/breast-cancer-stats/blob/main/apps/breast_cancer_incidence_rate_plotly.ipynb] | ## License diff --git a/apps/breast_cancer_incidence_rate_plotly.ipynb b/apps/breast_cancer_incidence_rate.ipynb similarity index 80% rename from apps/breast_cancer_incidence_rate_plotly.ipynb rename to apps/breast_cancer_incidence_rate.ipynb index 2df864b..9e11009 100644 --- a/apps/breast_cancer_incidence_rate_plotly.ipynb +++ b/apps/breast_cancer_incidence_rate.ipynb @@ -2,13 +2,36 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "Requirement already satisfied: pandas in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (2.0.3)\n", + "Requirement already satisfied: plotly in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (5.15.0)\n", + "Requirement already satisfied: nbformat in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (5.9.2)\n", + "Requirement already satisfied: kaleido in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (0.2.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: numpy>=1.20.3 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from pandas) (1.24.4)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from plotly) (8.2.2)\n", + "Requirement already satisfied: packaging in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from plotly) (23.1)\n", + "Requirement already satisfied: fastjsonschema in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from nbformat) (2.18.0)\n", + "Requirement already satisfied: jsonschema>=2.6 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from nbformat) (4.19.0)\n", + "Requirement already satisfied: jupyter-core in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from nbformat) (5.3.1)\n", + "Requirement already satisfied: traitlets>=5.1 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from nbformat) (5.9.0)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (23.1.0)\n", + "Requirement already satisfied: importlib-resources>=1.4.0 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (6.0.1)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (2023.7.1)\n", + "Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (1.3.10)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (0.30.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat) (0.9.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from jupyter-core->nbformat) (3.10.0)\n", + "Requirement already satisfied: zipp>=3.1.0 in /Users/andreped/workspace/breast-cancer-stats/venv/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema>=2.6->nbformat) (3.16.2)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" @@ -22,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -35,8 +58,6 @@ { "autocolorscale": false, "colorbar": { - "len": 0.75, - "thickness": 15, "title": { "text": "Cum.risk" } @@ -44,39 +65,39 @@ "colorscale": [ [ 0, - "rgb(247,251,255)" + "rgb(255,245,240)" ], [ 0.125, - "rgb(222,235,247)" + "rgb(254,224,210)" ], [ 0.25, - "rgb(198,219,239)" + "rgb(252,187,161)" ], [ 0.375, - "rgb(158,202,225)" + "rgb(252,146,114)" ], [ 0.5, - "rgb(107,174,214)" + "rgb(251,106,74)" ], [ 0.625, - "rgb(66,146,198)" + "rgb(239,59,44)" ], [ 0.75, - "rgb(33,113,181)" + "rgb(203,24,29)" ], [ 0.875, - "rgb(8,81,156)" + "rgb(165,15,21)" ], [ 1, - "rgb(8,48,107)" + "rgb(103,0,13)" ] ], "locations": [ @@ -462,191 +483,191 @@ ], "type": "choropleth", "z": [ - 3.11, - 4.86, - 5.62, - 3.68, - 3.72, - 8.34, - 10.5, - 7.47, - 6.7, - 4.81, - 1.83, - 5.54, - 8.03, - 12.07, - 0.6, - 2.41, - 5.69, - 2, - 6.68, - 5.31, - 4.98, - 5.88, - 6.52, - 2.29, - 2.8, - 5.83, - 2.44, - 4.12, - 8.89, + 2.07, + 1.24, + 1.96, + 1.85, + 1.47, + 2.06, + 1.24, + 1.49, + 3.26, + 1.47, + 1.03, + 2.2, + 4.38, 1.64, - 3.78, - 2.93, - 4.08, - 4.05, - 4.18, - 5.21, - 2.37, - 2.74, - 2.97, - 5.24, - 7.55, - 4.94, - 8.76, - 7.82, - 2.58, - 10.57, - 6.23, - 4.13, - 4.27, - 3.84, - 4.26, - 4.27, - 6.88, - 6.71, - 10.3, - 10.55, - 6.08, - 6.64, - 4.24, - 3.55, - 6.3, + 0.36, + 0.67, + 1.81, + 0.7, + 1.48, + 1.3, + 1.95, + 1.3, + 1.79, 1.03, - 5.71, - 8.73, - 3.93, - 7.63, - 7.14, - 4.16, - 3.19, - 2.05, - 5.31, - 3.05, - 3.44, - 8.4, - 8.81, - 2.81, - 4.83, - 3.67, - 5.72, - 9.66, - 8.46, - 9.27, - 4.68, - 6.7, - 7.98, - 4.18, - 6.36, - 4.69, - 3.49, - 6.41, - 5.74, + 1.75, + 1.49, + 1.07, + 2.23, + 1.41, + 0.73, + 2.5, + 1.25, + 2.66, + 1.09, + 1.16, + 1.43, + 1.15, + 1.27, + 1.77, + 1.23, + 1.66, + 1.38, + 1.98, + 1.3, + 1.45, + 1.62, + 2.78, + 1.19, + 0.83, + 1.92, + 2.57, 2.61, - 3.9, - 6.23, + 1.53, + 4.4, + 1.38, + 1.65, + 1.62, + 2.12, + 2.6, + 1.3, + 2.69, + 0.61, + 2.49, + 1.71, + 1.92, + 1.5, + 1.76, + 1.66, + 0.79, + 1.18, + 1.62, + 1.67, + 1.37, + 1.94, + 1.36, + 1.49, + 1.78, + 1.15, + 2.42, + 1.6, + 1.7, + 1.43, + 2.7, + 3.5, + 1.11, + 1.58, + 2.16, + 2.28, + 1.06, + 0.68, + 1.83, + 0.92, + 1.67, + 2.33, + 1.06, + 1.96, + 2.07, + 1.65, + 1.59, + 1.45, + 2.07, + 1.57, + 2.24, + 1.86, + 2.86, + 1.22, + 1.9, + 1.87, + 2.19, + 1.18, + 0.42, + 2.23, + 2.8, + 1.86, + 1.34, + 1.55, + 2.59, + 0.79, + 1.64, + 1.77, + 0.83, + 1.51, + 1.34, + 2.21, + 2.86, + 1.13, + 2.38, + 1.23, + 2.88, + 1.84, + 0.96, 2.02, - 7.03, - 3.42, - 3.65, - 6.71, - 10.78, - 3.78, - 2.66, - 5.29, - 4.77, - 4.55, - 9.57, - 8.86, - 3.37, - 7.02, - 4.33, - 1.17, - 4.93, - 7.99, - 5.84, - 2.22, - 4, - 5.96, + 1.85, + 1.32, + 1.63, + 1.13, + 1.58, + 1.59, + 1.3, + 1.98, + 1.7, + 1.69, + 1.8, + 1.82, + 0.93, + 1.89, + 2.7, + 2.54, + 2.01, + 1.9, 1.47, - 10.75, - 10.61, - 2.92, - 9.92, - 3.79, - 3.25, - 5.38, - 8.95, - 4.22, - 4.61, - 4.98, - 6.11, - 3.77, - 5.66, - 7.19, - 7.43, - 2.71, - 2.72, - 7.48, - 4.91, - 6.3, - 7.23, - 6.12, - 3.16, - 5.33, - 3.61, - 3.01, - 3.37, - 9.26, - 4.03, - 8.27, - 6.6, - 3.69, - 7.58, - 4.4, - 5.6, - 4.38, - 8.14, - 2.71, - 4.16, - 5.61, - 2.51, - 9.23, - 9.84, - 6.16, - 2.06, - 4.12, - 3.21, - 5.84, - 6.58, - 4.28, - 4.89, - 3.24, - 2.47, - 4.93, - 6.46, - 5.04, - 9.42, - 2.74, - 9.81, - 3.2, - 7.02, - 2.77, - 5.69, - 9.06, - 3.12, - 2.31 + 1.42, + 2.97, + 1.74, + 2.3, + 1.13, + 1.76, + 2.12, + 1.65, + 1.06, + 1.32, + 1.48, + 2.91, + 0.91, + 1.39, + 1.84, + 2.4, + 1.7, + 1.28, + 1.39, + 1.59, + 1.38, + 1.74, + 1.87, + 1.89, + 1.45, + 1.44, + 1.36, + 2.03, + 2.22, + 1.41, + 2.04, + 3.14, + 1.95, + 1.12 ] } ], @@ -654,8 +675,8 @@ "annotations": [ { "showarrow": false, - "text": "Data source: GLOBOSCAN 2020 (IARC)", - "x": 0.015, + "text": "Source: GLOBOSCAN 2020 (IARC)", + "x": 0.55, "xref": "paper", "y": 0.15, "yref": "paper" @@ -666,7 +687,7 @@ "type": "equirectangular" }, "showcoastlines": false, - "showframe": false + "showframe": true }, "margin": { "b": 0, @@ -1502,39 +1523,34 @@ "import pandas as pd\n", "\n", "\n", - "cancer_data = pd.read_csv(\"../data/cum_risk_breast_cancer_incidence.csv\")\n", + "cancer_data = pd.read_csv(\"../data/cum_risk_breast_cancer_mortality.csv\")\n", "\n", "fig = go.Figure(data=go.Choropleth(\n", " locations = cancer_data['ISO code'],\n", " z = cancer_data['Value'],\n", " text = cancer_data['Population'],\n", - " colorscale = 'Blues',\n", + " colorscale = 'Reds',\n", " autocolorscale=False,\n", " reversescale=False,\n", " marker_line_color='darkgray',\n", " marker_line_width=0.5,\n", " #colorbar_tickprefix = '%',\n", - " #colorbar=None,\n", " colorbar_title = 'Cum.risk',\n", "))\n", "\n", - "#fig.data[0].colorbar.x=-0.15\n", - "fig.data[0].colorbar.thickness=15\n", - "fig.data[0].colorbar.len=0.75\n", - "\n", "fig.update_layout(\n", - " #title_text='Estimated cumulative risk of breast cancer incidence in 2020, females, ages 0-74',\n", + " #title_text='Estimated cumulative risk of breast cancer mortality in 2020, females, ages 0-74',\n", " geo=dict(\n", " showframe=False,\n", - " showcoastlines=False,\n", - " projection_type='equirectangular' # times\n", + " showcoastlines=True,\n", + " projection_type='equirectangular'\n", " ),\n", " annotations = [dict(\n", - " x=0.015,\n", + " x=0.55,\n", " y=0.15,\n", " xref='paper',\n", " yref='paper',\n", - " text='Data source: GLOBOSCAN 2020 (IARC)',\n", + " text='Source: GLOBOSCAN 2020 (IARC)',\n", " showarrow = False\n", " )],\n", " margin=go.layout.Margin(\n", @@ -1542,13 +1558,13 @@ " r=0, #right margin\n", " b=0, #bottom margin\n", " t=0 #top margin <- Need to be adjusted if title is added!\n", - " ),\n", - " #coloraxis_colorbar_x=0.26,\n", + " )\n", ")\n", "\n", - "fig.write_image(\"cum_risk_breast_cancer_incidence.svg\")\n", "\n", - "fig.show()\n" + "fig.show()\n", + "\n", + "fig.write_image(\"test.svg\")\n" ] } ], diff --git a/apps/breast_cancer_mortality_rate_plotly.ipynb b/apps/breast_cancer_mortality_rate.ipynb similarity index 99% rename from apps/breast_cancer_mortality_rate_plotly.ipynb rename to apps/breast_cancer_mortality_rate.ipynb index da96d55..0a61a77 100644 --- a/apps/breast_cancer_mortality_rate_plotly.ipynb +++ b/apps/breast_cancer_mortality_rate.ipynb @@ -1570,7 +1570,7 @@ "fig.data[0].colorbar.len=0.75\n", "\n", "fig.show()\n", - "fig.write_image(\"test.svg\")\n" + "fig.write_image(\"cum_risk_breast_cancer_incidence.svg\")\n" ] } ],