diff --git a/Worldometer/Worldometer_World_population_evolution_and_projections.ipynb b/Worldometer/Worldometer_World_population_evolution_and_projections.ipynb index 47d9f329ac..d32c384284 100644 --- a/Worldometer/Worldometer_World_population_evolution_and_projections.ipynb +++ b/Worldometer/Worldometer_World_population_evolution_and_projections.ipynb @@ -57,7 +57,7 @@ "tags": [] }, "source": [ - "**Last update:** 2023-04-12 (Created: 2021-05-05)" + "**Last update:** 2023-10-14 (Created: 2021-05-05)" ] }, { @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "located-savannah", "metadata": { "papermill": {}, @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "protecting-november", "metadata": { "papermill": {}, @@ -137,14 +137,13 @@ " \"https://www.worldometers.info/world-population/world-population-projections/\",\n", "]\n", "\n", + "# Update this list to have latest cols from worldometers\n", "TABLE_COLS = [\n", " \"Year\",\n", - " \"World Population\",\n", + " \"WorldPopulation\",\n", " \"YearlyChange\",\n", " \"NetChange\",\n", " \"Density(P/KmĀ²)\",\n", - " \"UrbanPop\",\n", - " \"UrbanPop %\",\n", "]" ] }, @@ -172,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "posted-ethics", "metadata": { "papermill": {}, @@ -182,14 +181,19 @@ "source": [ "# Generic functions\n", "\n", + "def preprocess_cols(cols):\n", + " cols = [col.replace(' ', '') for col in cols]\n", + " return cols\n", "\n", "def scrap_table(url, table_cloumns):\n", " page = requests.get(url)\n", " soup = BeautifulSoup(page.text, \"html.parser\")\n", " dfs = pd.read_html(page.text)\n", - "\n", " for df in dfs:\n", - " if df.columns.to_list() == table_cloumns:\n", + " # Need this as df columns have space inconsistency\n", + " columns_in_page = preprocess_cols(df.columns.to_list())\n", + " if columns_in_page == table_cloumns:\n", + " df.columns = columns_in_page\n", " return df\n", " return None\n", "\n", @@ -202,34 +206,10 @@ " if table is None:\n", " table = new_value\n", " else:\n", - " table = table.append(new_value)\n", + " table = pd.concat([table, new_value])\n", " return table" ] }, - { - "cell_type": "markdown", - "id": "surprising-basics", - "metadata": { - "papermill": {}, - "tags": [] - }, - "source": [ - "### Print table" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "nutritional-variable", - "metadata": { - "papermill": {}, - "tags": [] - }, - "outputs": [], - "source": [ - "table" - ] - }, { "cell_type": "markdown", "id": "insured-prefix", @@ -243,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "western-czech", "metadata": { "papermill": {}, @@ -258,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "entire-working", "metadata": { "papermill": {}, @@ -291,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "knowing-norfolk", "metadata": { "papermill": {}, @@ -330,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "unusual-worker", "metadata": { "papermill": {}, @@ -354,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "terminal-brown", "metadata": { "papermill": {}, @@ -378,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "according-springfield", "metadata": { "papermill": {}, @@ -404,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "fundamental-aquatic", "metadata": { "papermill": {}, @@ -454,4 +434,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +}