-
Notifications
You must be signed in to change notification settings - Fork 1
/
analysis.py
164 lines (135 loc) · 7.3 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
def perform_analysis(df, user_country):
# Filter the DataFrame for the user-selected country
country_df = df[df['country'] == user_country]
# Set up subplots
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(18, 12))
# Energy Mix Trends
energy_sources = ['coal', 'gas', 'renewables', 'nuclear']
axes[0, 0].set_title(f'Energy Mix Trends for {user_country} Over the Years')
for source in energy_sources:
axes[0, 0].plot(country_df.index, country_df[f'{source}_share_energy'], label=source.capitalize())
axes[0, 0].set_xlabel('Year')
axes[0, 0].set_ylabel('Share of Energy')
axes[0, 0].legend()
axes[0, 0].grid(True)
# Per Capita Consumption
axes[0, 1].set_title(f'Per Capita Energy Consumption Trends for {user_country}')
axes[0, 1].plot(country_df.index, country_df['per_capita_electricity'], marker='o', label='Per Capita Consumption')
axes[0, 1].set_xlabel('Year')
axes[0, 1].set_ylabel('Per Capita Electricity Consumption')
axes[0, 1].legend()
axes[0, 1].grid(True)
# Renewable Energy Growth
renewable_sources = ['solar', 'wind', 'hydro']
axes[0, 2].set_title(f'Renewable Energy Growth for {user_country} Over the Years')
for source in renewable_sources:
axes[0, 2].plot(country_df.index, country_df[f'{source}_share_energy'], label=source.capitalize())
axes[0, 2].set_xlabel('Year')
axes[0, 2].set_ylabel('Share of Energy')
axes[0, 2].legend()
axes[0, 2].grid(True)
# Carbon Intensity
axes[1, 0].set_title(f'Carbon Intensity of Electricity Generation for {user_country} Over the Years')
axes[1, 0].plot(country_df.index, country_df['carbon_intensity_elec'], marker='o', color='red', label='Carbon Intensity')
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Carbon Intensity')
axes[1, 0].legend()
axes[1, 0].grid(True)
# Energy and GDP Relationship
axes[1, 1].set_title(f'Energy Consumption vs. GDP for {user_country}')
axes[1, 1].scatter(country_df['gdp'], country_df['primary_energy_consumption'], alpha=0.7)
axes[1, 1].set_xlabel('GDP')
axes[1, 1].set_ylabel('Primary Energy Consumption')
axes[1, 1].grid(True)
# Fossil Fuel Consumption
fossil_fuels = ['coal', 'oil', 'gas']
axes[1, 2].set_title(f'Fossil Fuel Consumption Trends for {user_country} Over the Years')
for fuel in fossil_fuels:
axes[1, 2].plot(country_df.index, country_df[f'{fuel}_consumption'], label=fuel.capitalize())
axes[1, 2].set_xlabel('Year')
axes[1, 2].set_ylabel('Fossil Fuel Consumption')
axes[1, 2].legend()
axes[1, 2].grid(True)
# Electricity Generation and Consumption
axes[2, 0].set_title(f'Electricity Generation and Consumption Trends for {user_country} Over the Years')
axes[2, 0].plot(country_df.index, country_df['electricity_generation'], label='Electricity Generation', marker='o')
axes[2, 0].plot(country_df.index, country_df['energy_cons_change_twh'], label='Energy Consumption', marker='o')
axes[2, 0].set_xlabel('Year')
axes[2, 0].set_ylabel('TWH (Terawatt-hours)')
axes[2, 0].legend()
axes[2, 0].grid(True)
# Future Energy Consumption Prediction
# Remove rows with NaN values in the target variable
country_df = country_df.dropna(subset=['primary_energy_consumption'])
# Select features (X) and target variable (y)
X = country_df.index.year.values.reshape(-1, 1)
y = country_df['primary_energy_consumption']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Calculate the model's performance (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'The model predicts future energy consumption with an RMSE of {rmse:.2f} units.')
# Make predictions for future years
future_years = pd.DataFrame({'year': pd.date_range(start='2023', end='2030', freq='Y')})
future_years['year'] = future_years['year'].dt.year # Extract year from datetime
future_predictions = model.predict(future_years[['year']].values.reshape(-1, 1))
# Plot the historical data and predictions
axes[2, 1].set_title(f'Energy Consumption Prediction for {user_country}')
axes[2, 1].scatter(country_df.index, country_df['primary_energy_consumption'], label='Historical Data', color='blue')
axes[2, 1].plot(X_test, y_pred, label='Test Set Predictions', color='green')
axes[2, 1].plot(future_years['year'], future_predictions, label='Future Predictions', color='red', linestyle='--')
axes[2, 1].set_xlabel('Year')
axes[2, 1].set_ylabel('Primary Energy Consumption')
axes[2, 1].legend()
axes[2, 1].grid(True)
'''RMSE: Root Mean Squared Error'''
# Aggregating data for the whole world
world_df = df.groupby('year').sum()
# Select features (X) and target variable (y)
X_world = world_df.index.year.values.reshape(-1, 1)
y_world = world_df['primary_energy_consumption']
# Split the data into training and testing sets (not necessary for the whole world)
X_train_world, X_test_world, y_train_world, y_test_world = train_test_split(X_world, y_world, test_size=0.2, random_state=42)
# Create and train a linear regression model for the whole world
model_world = LinearRegression()
model_world.fit(pd.DataFrame(X_train_world, columns=['year']), y_train_world)
# Make predictions for the whole world
y_pred_world = model_world.predict(pd.DataFrame(X_test_world, columns=['year']))
# Calculate the model's performance for the whole world (Root Mean Squared Error)
rmse_world = np.sqrt(mean_squared_error(y_test_world, y_pred_world))
print(f'The world model predicts future energy consumption with an RMSE of {rmse_world:.2f} units.')
# Make predictions for future years for the whole world
future_years_world = pd.DataFrame({'year': range(2023, 2031)})
future_predictions_world = model_world.predict(future_years_world[['year']])
# Plot the historical data and predictions for the whole world
axes[2, 2].set_title('World Energy Consumption Prediction')
axes[2, 2].scatter(world_df.index, world_df['primary_energy_consumption'], label='Historical Data', color='blue')
axes[2, 2].plot(X_test_world, y_pred_world, label='Test Set Predictions', color='green')
axes[2, 2].plot(future_years_world['year'], future_predictions_world, label='Future Predictions', color='red',
linestyle='--')
axes[2, 2].set_xlabel('Year')
axes[2, 2].set_ylabel('Primary Energy Consumption')
axes[2, 2].legend()
axes[2, 2].grid(True)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
def linear_regression_model(X_train, y_train):
# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
return model
def calculate_rmse(y_true, y_pred):
# Calculate the Root Mean Squared Error
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
return rmse