From ff48664fe138976c324138550dabc28efaa199e6 Mon Sep 17 00:00:00 2001 From: Shayan_Ta <66938889+ShaShaTa47@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:43:46 +1300 Subject: [PATCH] initial --- .gitignore | 126 +++++++++++++++++++++++++++++++++++ actual_results.csv | 2 + app.py | 57 ++++++++++++++++ polling_analysis_results.csv | 7 ++ pollsters.csv | 7 ++ requirements.txt | 7 ++ 6 files changed, 206 insertions(+) create mode 100644 .gitignore create mode 100644 actual_results.csv create mode 100644 app.py create mode 100644 polling_analysis_results.csv create mode 100644 pollsters.csv create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a17369 --- /dev/null +++ b/.gitignore @@ -0,0 +1,126 @@ + +# Editors +.vscode/ +.idea/ + +# Vagrant +.vagrant/ + +# Mac/OSX +.DS_Store + +# Windows +Thumbs.db + +# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json \ No newline at end of file diff --git a/actual_results.csv b/actual_results.csv new file mode 100644 index 0000000..960510c --- /dev/null +++ b/actual_results.csv @@ -0,0 +1,2 @@ +Nat,Lab,Grn,Act,NZF,TPM +38.08,26.91,11.60,8.64,6.08,3.08 \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..a36e4e8 --- /dev/null +++ b/app.py @@ -0,0 +1,57 @@ +import numpy as np +import pandas as pd +import scipy.stats as stats + +# Load expected results from CSV +pollster_df = pd.read_csv('pollsters.csv', dtype={'Pollster': str, 'Nat': float, 'Lab': float, 'Grn': float, 'Act': float, 'NZF': float, 'TPM': float}) + +# Load actual results from CSV +actual_df = pd.read_csv('actual_results.csv', dtype={'Nat': float, 'Lab': float, 'Grn': float, 'Act': float, 'NZF': float, 'TPM': float}) + +# Assuming there is only one row in actual results, extract the actual values +actual_values = actual_df.iloc[0].values.astype(float) # Ensure the data is float + +# Prepare a list to hold the results +results = [] + +# Calculate SSD, R^2, and Chi-Squared for each pollster +for index, row in pollster_df.iterrows(): + pollster_name = row['Pollster'] + expected_values = row[1:].values.astype(float) # Ensure the expected values are float + + # Calculate SSD + ssd = np.sum((actual_values - expected_values) ** 2) + + # Calculate R^2 + mean_actual = np.mean(actual_values) + ss_total = np.sum((actual_values - mean_actual) ** 2) + ss_residual = np.sum((actual_values - expected_values) ** 2) + r_squared = 1 - (ss_residual / ss_total) + + # Normalize expected values for Chi-Squared Test + observed_sum = np.sum(actual_values) + expected_sum = np.sum(expected_values) + + if observed_sum != expected_sum: + scale_factor = observed_sum / expected_sum + expected_values = expected_values * scale_factor + + # Calculate Chi-Squared + chi2, p_value = stats.chisquare(f_obs=actual_values, f_exp=expected_values) + + # Append results to the list + results.append({ + 'Pollster': pollster_name, + 'SSD': ssd, + 'R^2': r_squared, + 'Chi-Squared': chi2, + 'P-Value': p_value + }) + +# Create a DataFrame from the results +results_df = pd.DataFrame(results) + +# Save results to a CSV file +results_df.to_csv('polling_analysis_results.csv', index=False) + +print("Results have been saved to polling_analysis_results.csv") diff --git a/polling_analysis_results.csv b/polling_analysis_results.csv new file mode 100644 index 0000000..1b60f21 --- /dev/null +++ b/polling_analysis_results.csv @@ -0,0 +1,7 @@ +Pollster,SSD,R^2,Chi-Squared,P-Value +Newshub/Reid,24.74289999999999,0.9738166181952858,1.2514020129913814,0.9398520247581366 +1 News/Verian,9.416899999999996,0.9900348670480497,1.0750806622159517,0.9562855588314804 +TPU/Curia,8.000899999999994,0.9915333037161635,0.48405589248880365,0.992695473066957 +Talbot Mills,2.2069000000000005,0.9976646187267934,0.13866283519136488,0.9996375157201542 +Roy Morgan,80.04689999999998,0.9152929306999673,3.625606513416443,0.6044727353025233 +Guardian/Esntl,35.57289999999999,0.9623561174073808,2.2534329815298286,0.8130814264933469 diff --git a/pollsters.csv b/pollsters.csv new file mode 100644 index 0000000..0ea3fdb --- /dev/null +++ b/pollsters.csv @@ -0,0 +1,7 @@ +Pollster,Nat,Lab,Grn,Act,NZF,TPM +Newshub/Reid,34.5,27.5,14.9,8.8,6.8,2.7 +1 News/Verian,37.0,28.0,14.0,9.0,6.0,2.0 +TPU/Curia,35.9,27.9,10.6,9.1,6.9,3.7 +Talbot Mills,38.0,27.0,13.0,9.0,6.4,3.1 +Roy Morgan,30.5,26.0,15.0,11.5,7.5,3.0 +Guardian/Esntl,34.0,30.3,10.6,7.9,8.2,1.9 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f0431f0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +numpy==2.1.2 +pandas==2.2.3 +python-dateutil==2.9.0.post0 +pytz==2024.2 +scipy==1.14.1 +six==1.16.0 +tzdata==2024.2