From 20dc5a41b394e277f675455f14bef259f7e329bf Mon Sep 17 00:00:00 2001 From: Andrey Vykhodtsev Date: Fri, 9 Feb 2024 11:22:31 +0200 Subject: [PATCH] notebook changes --- 01-Load-Data-ACogSearch.ipynb | 101 +++++++++---------- 03-Quering-AOpenAI.ipynb | 15 +-- 07-SQLDB_QA.ipynb | 176 +++++++++++++--------------------- common/requirements.txt | 5 +- 4 files changed, 132 insertions(+), 165 deletions(-) diff --git a/01-Load-Data-ACogSearch.ipynb b/01-Load-Data-ACogSearch.ipynb index 35eb3db0..e79ec9f2 100644 --- a/01-Load-Data-ACogSearch.ipynb +++ b/01-Load-Data-ACogSearch.ipynb @@ -255,52 +255,52 @@ " }\n", " ]\n", " },\n", - " # {\n", - " # \"@odata.type\": \"#Microsoft.Skills.Text.KeyPhraseExtractionSkill\",\n", - " # \"context\": \"/document/pages/*\",\n", - " # \"maxKeyPhraseCount\": 2,\n", - " # \"defaultLanguageCode\": \"en\",\n", - " # \"inputs\": [\n", - " # {\n", - " # \"name\": \"text\", \n", - " # \"source\": \"/document/pages/*\"\n", - " # }\n", - " # ],\n", - " # \"outputs\": [\n", - " # {\n", - " # \"name\": \"keyPhrases\",\n", - " # \"targetName\": \"keyPhrases\"\n", - " # }\n", - " # ]\n", - " # },\n", - " # {\n", - " # \"@odata.type\": \"#Microsoft.Skills.Text.V3.EntityRecognitionSkill\",\n", - " # \"context\": \"/document/pages/*\",\n", - " # \"categories\": [\"Person\", \"URL\", \"Email\"],\n", - " # \"minimumPrecision\": 0.5, \n", - " # \"defaultLanguageCode\": \"en\",\n", - " # \"inputs\": [\n", - " # {\n", - " # \"name\": \"text\", \n", - " # \"source\":\"/document/pages/*\"\n", - " # }\n", - " # ],\n", - " # \"outputs\": [\n", - " # {\n", - " # \"name\": \"persons\", \n", - " # \"targetName\": \"persons\"\n", - " # },\n", - " # {\n", - " # \"name\": \"urls\", \n", - " # \"targetName\": \"urls\"\n", - " # },\n", - " # {\n", - " # \"name\": \"emails\", \n", - " # \"targetName\": \"emails\"\n", - " # }\n", - " # ]\n", - " # }\n", - " # ],\n", + " # {\n", + " # \"@odata.type\": \"#Microsoft.Skills.Text.KeyPhraseExtractionSkill\",\n", + " # \"context\": \"/document/pages/*\",\n", + " # \"maxKeyPhraseCount\": 2,\n", + " # \"defaultLanguageCode\": \"en\",\n", + " # \"inputs\": [\n", + " # {\n", + " # \"name\": \"text\", \n", + " # \"source\": \"/document/pages/*\"\n", + " # }\n", + " # ],\n", + " # \"outputs\": [\n", + " # {\n", + " # \"name\": \"keyPhrases\",\n", + " # \"targetName\": \"keyPhrases\"\n", + " # }\n", + " # ]\n", + " # },\n", + " # {\n", + " # \"@odata.type\": \"#Microsoft.Skills.Text.V3.EntityRecognitionSkill\",\n", + " # \"context\": \"/document/pages/*\",\n", + " # \"categories\": [\"Person\", \"URL\", \"Email\"],\n", + " # \"minimumPrecision\": 0.5, \n", + " # \"defaultLanguageCode\": \"en\",\n", + " # \"inputs\": [\n", + " # {\n", + " # \"name\": \"text\", \n", + " # \"source\":\"/document/pages/*\"\n", + " # }\n", + " # ],\n", + " # \"outputs\": [\n", + " # {\n", + " # \"name\": \"persons\", \n", + " # \"targetName\": \"persons\"\n", + " # },\n", + " # {\n", + " # \"name\": \"urls\", \n", + " # \"targetName\": \"urls\"\n", + " # },\n", + " # {\n", + " # \"name\": \"emails\", \n", + " # \"targetName\": \"emails\"\n", + " # }\n", + " # ]\n", + " # }\n", + " ],\n", " \"cognitiveServices\": {\n", " \"@odata.type\": \"#Microsoft.Azure.Search.CognitiveServicesByKey\",\n", " \"description\": os.environ['COG_SERVICES_NAME'],\n", @@ -570,10 +570,11 @@ " r = requests.get(os.environ['AZURE_SEARCH_ENDPOINT'] + \"/indexers/\" + indexer_name +\n", " \"/status\", headers=headers, params=params)\n", " # pprint(json.dumps(r.json(), indent=1))\n", - " print(r.status_code)\n", - " print(\"Status:\",r.json().get('lastResult').get('status'))\n", - " print(\"Items Processed:\",r.json().get('lastResult').get('itemsProcessed'))\n", - " print(r.ok)\n", + " #print(r.status_code)\n", + " #print(\"Status:\",r.json().get('lastResult').get('status'))\n", + " #print(\"Items Processed:\",r.json().get('lastResult').get('itemsProcessed'))\n", + " #print(r.ok)\n", + " print(r.json().get('lastResult'))\n", " \n", "except Exception as e:\n", " print(\"Wait a few seconds until the process starts and run this cell again.\")" diff --git a/03-Quering-AOpenAI.ipynb b/03-Quering-AOpenAI.ipynb index 880fcde2..92f6afe7 100644 --- a/03-Quering-AOpenAI.ipynb +++ b/03-Quering-AOpenAI.ipynb @@ -772,10 +772,11 @@ "display(HTML('

Top Answers

'))\n", "\n", "for index,search_results in agg_search_results.items():\n", - " for result in search_results['@search.answers']:\n", - " if result['score'] > 0.5: # Show answers that are at least 50% of the max possible score=1\n", - " display(HTML('
' + 'Answer - score: ' + str(round(result['score'],2)) + '
'))\n", - " display(HTML(result['text']))\n", + " if '@search_answers' in search_results:\n", + " for result in search_results['@search.answers']:\n", + " if result['score'] > 0.5: # Show answers that are at least 50% of the max possible score=1\n", + " display(HTML('
' + 'Answer - score: ' + str(round(result['score'],2)) + '
'))\n", + " display(HTML(result['text']))\n", " \n", "print(\"\\n\\n\")\n", "display(HTML('

Top Results

'))\n", @@ -1422,9 +1423,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10 - SDK v2", + "display_name": ".venv", "language": "python", - "name": "python310-sdkv2" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1436,7 +1437,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/07-SQLDB_QA.ipynb b/07-SQLDB_QA.ipynb index a1ed15f9..51b89044 100644 --- a/07-SQLDB_QA.ipynb +++ b/07-SQLDB_QA.ipynb @@ -114,29 +114,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection successful!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_36761/2845560204.py:27: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " result = engine.execute(\"SELECT @@Version\")\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Connection successful!\n", "('Microsoft SQL Azure (RTM) - 12.0.2000.8 \\n\\tNov 2 2023 01:40:17 \\n\\tCopyright (C) 2022 Microsoft Corporation\\n',)\n" ] } ], "source": [ - "from sqlalchemy import create_engine\n", + "\n", + "from sqlalchemy import create_engine, text\n", + "from sqlalchemy.exc import OperationalError\n", "from sqlalchemy.engine.url import URL\n", "\n", + "\n", "db_config = {\n", " 'drivername': 'mssql+pyodbc',\n", " 'username': os.environ[\"SQL_SERVER_USERNAME\"] +'@'+ os.environ[\"SQL_SERVER_NAME\"],\n", @@ -150,8 +139,6 @@ "# Create a URL object for connecting to the database\n", "db_url = URL.create(**db_config)\n", "\n", - "# Print the resulting URL string\n", - "# print(db_url)\n", "\n", "# Connect to the Azure SQL Database using the URL string\n", "engine = create_engine(db_url)\n", @@ -160,7 +147,10 @@ "try:\n", " conn = engine.connect()\n", " print(\"Connection successful!\")\n", - " result = engine.execute(\"SELECT @@Version\")\n", + " # check the database version\n", + " result = conn.execute(text(\"SELECT @@version;\"))\n", + "\n", + " \n", " for row in result:\n", " print(row)\n", " conn.close()\n", @@ -230,23 +220,29 @@ " create_table_sql += f\"{name} DATETIME, \"\n", "create_table_sql = create_table_sql[:-2] + \")\"\n", "\n", + "\n", + "\n", "try:\n", - " #Createse the table in Azure SQL\n", - " engine.execute(create_table_sql)\n", - " print(\"Table\",table_name,\"succesfully created\")\n", - " # Insert data into SQL Database\n", - " lower = 0\n", - " upper = 1000\n", - " limit = df.shape[0]\n", "\n", - " while lower < limit:\n", - " df[lower:upper].to_sql(table_name, con=engine, if_exists='append', index=False)\n", - " print(\"rows:\", lower, \"-\", upper, \"inserted\")\n", - " lower = upper\n", - " upper = min(upper + 1000, limit)\n", + " with engine.connect() as conn:\n", + " conn.execute(text(f\"DROP TABLE {table_name}\"))\n", + " #Createse the table in Azure SQL\n", + " conn.execute(text(create_table_sql))\n", + " print(\"Table\",table_name,\"succesfully created\")\n", + " # Insert data into SQL Database\n", + " lower = 0\n", + " upper = 1000\n", + " limit = df.shape[0]\n", + "\n", + " while lower < limit:\n", + " df[lower:upper].to_sql(table_name, con=conn, if_exists='append', index=False)\n", + " print(\"rows:\", lower, \"-\", upper, \"inserted\")\n", + " lower = upper\n", + " upper = min(upper + 1000, limit)\n", "\n", "except Exception as e:\n", - " print(e)" + " print(e)\n", + " raise e" ] }, { @@ -346,7 +342,7 @@ "['sql_db_query', 'sql_db_schema', 'sql_db_list_tables', 'sql_db_query_checker']" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -458,9 +454,9 @@ "\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[32;1m\u001b[1;3mAction: sql_db_list_tables\n", - "Action Input: \"\"\u001b[0m\n", + "Action Input: \u001b[0m\n", "Observation: \u001b[38;5;200m\u001b[1;3mcovidtracking\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe `covidtracking` table seems to be the most relevant one for this question. I should check its schema to see what columns it has.\n", + "Thought:\u001b[32;1m\u001b[1;3mThe `covidtracking` table is likely to have the data I need for hospitalizations. I should check the schema of this table to understand what columns are available and to confirm that the `hospitalizedIncrease` column exists and is the correct one to use for this query.\n", "Action: sql_db_schema\n", "Action Input: covidtracking\u001b[0m\n", "Observation: \u001b[33;1m\u001b[1;3m\n", @@ -511,82 +507,51 @@ "/*\n", "3 rows from covidtracking table:\n", "date\tstate\tdeath\tdeathConfirmed\tdeathIncrease\tdeathProbable\thospitalized\thospitalizedCumulative\thospitalizedCurrently\thospitalizedIncrease\tinIcuCumulative\tinIcuCurrently\tnegative\tnegativeIncrease\tnegativeTestsAntibody\tnegativeTestsPeopleAntibody\tnegativeTestsViral\tonVentilatorCumulative\tonVentilatorCurrently\tpositive\tpositiveCasesViral\tpositiveIncrease\tpositiveScore\tpositiveTestsAntibody\tpositiveTestsAntigen\tpositiveTestsPeopleAntibody\tpositiveTestsPeopleAntigen\tpositiveTestsViral\trecovered\ttotalTestEncountersViral\ttotalTestEncountersViralIncrease\ttotalTestResults\ttotalTestResultsIncrease\ttotalTestsAntibody\ttotalTestsAntigen\ttotalTestsPeopleAntibody\ttotalTestsPeopleAntigen\ttotalTestsPeopleViral\ttotalTestsPeopleViralIncrease\ttotalTestsViral\ttotalTestsViralIncrease\n", - "2021-03-07\tAK\t305.0\t0.0\t0\t0.0\t1293.0\t1293.0\t33.0\t0\t0.0\t0.0\t0.0\t0\t0.0\t0.0\t1660758.0\t0.0\t2.0\t56886.0\t0.0\t0\t0\t0.0\t0.0\t0.0\t0.0\t68693.0\t0.0\t0.0\t0\t1731628.0\t0\t0.0\t0.0\t0.0\t0.0\t0.0\t0\t1731628.0\t0\n", - "2021-03-07\tAL\t10148.0\t7963.0\t-1\t2185.0\t45976.0\t45976.0\t494.0\t0\t2676.0\t0.0\t1931711.0\t2087\t0.0\t0.0\t0.0\t1515.0\t0.0\t499819.0\t392077.0\t408\t0\t0.0\t0.0\t0.0\t0.0\t0.0\t295690.0\t0.0\t0\t2323788.0\t2347\t0.0\t0.0\t119757.0\t0.0\t2323788.0\t2347\t0.0\t0\n", - "2021-03-07\tAR\t5319.0\t4308.0\t22\t1011.0\t14926.0\t14926.0\t335.0\t11\t0.0\t141.0\t2480716.0\t3267\t0.0\t0.0\t2480716.0\t1533.0\t65.0\t324818.0\t255726.0\t165\t0\t0.0\t0.0\t0.0\t81803.0\t0.0\t315517.0\t0.0\t0\t2736442.0\t3380\t0.0\t0.0\t0.0\t481311.0\t0.0\t0\t2736442.0\t3380\n", + "\n", "*/\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe `covidtracking` table has the columns `date`, `state`, and `hospitalizedIncrease` which are relevant to the question. I will write a query to get the sum of `hospitalizedIncrease` for the state of Texas and for all states during July 2020.\n", + "Thought:\u001b[32;1m\u001b[1;3mThe `covidtracking` table has a column named `hospitalizedIncrease` which is what I need to use to find out the number of patients hospitalized during July 2020 in Texas and nationwide. I will write a query to sum the `hospitalizedIncrease` for the state of Texas and another to sum it for all states for the month of July 2020.\n", "Action: sql_db_query_checker\n", - "Action Input: \n", - "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", - "```\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", - "```\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe query syntax is correct. Now I will run this query to get the total number of patients hospitalized in Texas during July 2020.\n", - "Action: sql_db_query\n", - "Action Input: \n", + "Action Input: SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedTexas FROM covidtracking WHERE state = 'TX' AND date LIKE '2020-07%'\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mThe provided SQL query does not exhibit any of the common mistakes listed. It correctly uses the `SUM` function to aggregate the `hospitalizedIncrease` column, filters by the state of Texas (`'TX'`), and uses the `LIKE` operator to filter dates for the month of July 2020. The query does not use `NOT IN`, `UNION`, `BETWEEN`, or joins, so there are no mistakes related to those aspects. There are no apparent data type mismatches, and the identifiers do not require quoting. The function `SUM` is used with the correct number of arguments.\n", + "\n", + "Here is the original query:\n", + "\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedTexas FROM covidtracking WHERE state = 'TX' AND date LIKE '2020-07%'\n", "```\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3mError: (pyodbc.ProgrammingError) ('42000', \"[42000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Incorrect syntax near '`'. (102) (SQLExecDirectW)\")\n", - "[SQL: ```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", - "```]\n", - "(Background on this error at: https://sqlalche.me/e/14/f405)\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mIt seems there was an error due to the inclusion of markdown backticks in the SQL query. I will remove the backticks and try again.\n", + "Thought:\u001b[32;1m\u001b[1;3mThe query checker has confirmed that the query syntax is correct. Now I will execute the query to get the total number of patients hospitalized in Texas during July 2020.\n", "Action: sql_db_query\n", - "Action Input: \n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m[(0,)]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe result indicates that no patients were hospitalized in Texas during July 2020 according to the `hospitalizedIncrease` column. Now I will write and run a query to get the total number of patients hospitalized in all states during July 2020.\n", + "Action Input: SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedTexas FROM covidtracking WHERE state = 'TX' AND date LIKE '2020-07%'\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m[(None,)]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe result returned is `None`, which indicates that there might be no data for the `hospitalizedIncrease` column for Texas in July 2020, or the data might contain NULL values which result in a `None` when summed. I will now write and check a query to get the nationwide total of hospitalized patients during July 2020.\n", "Action: sql_db_query_checker\n", - "Action Input: \n", + "Action Input: SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedNationwide FROM covidtracking WHERE date LIKE '2020-07%'\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mThe provided SQL query does not exhibit any of the common mistakes listed. It correctly uses the `SUM` function to aggregate the `hospitalizedIncrease` column, and it uses the `LIKE` operator to filter the dates for the month of July 2020. The query does not use `NOT IN`, `UNION`/`UNION ALL`, `BETWEEN`, or joins, so those potential issues are not applicable. There is no indication of data type mismatch, improper quoting, incorrect function arguments, or incorrect casting.\n", + "\n", + "Therefore, the original query is reproduced below without any changes:\n", + "\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%'\n", - "```\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedNationwide FROM covidtracking WHERE date LIKE '2020-07%'\n", "```\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe query syntax is correct. Now I will run this query to get the total number of patients hospitalized in all states during July 2020.\n", + "Thought:\u001b[32;1m\u001b[1;3mThe query checker has confirmed that the query syntax is correct. Now I will execute the query to get the nationwide total of hospitalized patients during July 2020.\n", "Action: sql_db_query\n", - "Action Input: \n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%'\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m[(63105,)]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n", - "Final Answer: During July 2020, no patients were hospitalized in Texas and 63,105 patients were hospitalized nationwide.\n", + "Action Input: SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedNationwide FROM covidtracking WHERE date LIKE '2020-07%'\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m[(None,)]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe result returned is `None` for the nationwide total as well, which indicates that there might be no data for the `hospitalizedIncrease` column for the month of July 2020, or the data might contain NULL values which result in a `None` when summed. Since both queries for Texas and nationwide have returned `None`, it is likely that the `hospitalizedIncrease` data for July 2020 is not available or not recorded in the database.\n", + "\n", + "Final Answer: I cannot provide the number of patients hospitalized during July 2020 in Texas or nationwide because the data is not available or not recorded in the `hospitalizedIncrease` column of the `covidtracking` table.\n", "\n", "Explanation:\n", - "I queried the `covidtracking` table for the sum of the `hospitalizedIncrease` column where the date starts with '2020-07'. For Texas, the query returned 0, indicating that no patients were hospitalized in Texas during July 2020. For all states, the query returned 63,105, indicating that 63,105 patients were hospitalized nationwide during July 2020. \n", - "I used the following queries:\n", + "I executed two queries to calculate the sum of `hospitalizedIncrease` for Texas and nationwide for the month of July 2020. Both queries returned `None`, indicating that the data is not available or contains NULL values. Here are the queries I used:\n", "\n", + "For Texas:\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedTexas FROM covidtracking WHERE state = 'TX' AND date LIKE '2020-07%'\n", "```\n", "\n", + "For Nationwide:\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedNationwide FROM covidtracking WHERE date LIKE '2020-07%'\n", "```\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n" @@ -612,22 +577,19 @@ { "data": { "text/markdown": [ - "During July 2020, no patients were hospitalized in Texas and 63,105 patients were hospitalized nationwide.\n", + "I cannot provide the number of patients hospitalized during July 2020 in Texas or nationwide because the data is not available or not recorded in the `hospitalizedIncrease` column of the `covidtracking` table.\n", "\n", "Explanation:\n", - "I queried the `covidtracking` table for the sum of the `hospitalizedIncrease` column where the date starts with '2020-07'. For Texas, the query returned 0, indicating that no patients were hospitalized in Texas during July 2020. For all states, the query returned 63,105, indicating that 63,105 patients were hospitalized nationwide during July 2020. \n", - "I used the following queries:\n", + "I executed two queries to calculate the sum of `hospitalizedIncrease` for Texas and nationwide for the month of July 2020. Both queries returned `None`, indicating that the data is not available or contains NULL values. Here are the queries I used:\n", "\n", + "For Texas:\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%' AND state = 'TX'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedTexas FROM covidtracking WHERE state = 'TX' AND date LIKE '2020-07%'\n", "```\n", "\n", + "For Nationwide:\n", "```sql\n", - "SELECT SUM(hospitalizedIncrease) as TotalHospitalized\n", - "FROM covidtracking\n", - "WHERE date LIKE '2020-07%'\n", + "SELECT SUM(hospitalizedIncrease) AS TotalHospitalizedNationwide FROM covidtracking WHERE date LIKE '2020-07%'\n", "```" ], "text/plain": [ @@ -683,9 +645,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10 - SDK v2", + "display_name": ".venv", "language": "python", - "name": "python310-sdkv2" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -697,7 +659,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/common/requirements.txt b/common/requirements.txt index f262ca92..ba4d867c 100644 --- a/common/requirements.txt +++ b/common/requirements.txt @@ -1,3 +1,6 @@ +requests +pandas +jinja2 langchain==0.0.347 langchain-experimental==0.0.44 openai==1.3.7 @@ -8,7 +11,7 @@ docx2txt pillow pypdf tenacity -sqlalchemy<2.0.0 +sqlalchemy pyodbc tabulate azure-cosmos