-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Now including feature selection via forests for interconnectors.
- Loading branch information
1 parent
7349072
commit 437a182
Showing
4 changed files
with
165 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
|
||
*.png | ||
|
||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Feature selection trial | ||
# Using forests | ||
# Adapted from sklearn tutorials. | ||
# Original code here http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html#sphx-glr-auto-examples-ensemble-plot-forest-importances-py | ||
|
||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
from sklearn.datasets import make_classification | ||
from sklearn.ensemble import ExtraTreesClassifier | ||
|
||
import marketUtils | ||
|
||
|
||
|
||
nem = marketUtils.getNem() | ||
interconnectors = marketUtils.getInterconnectorFlows() | ||
X = [] | ||
# xlabels = [] | ||
y = [] | ||
|
||
# for attribute in list(interconnectors.itervalues().next()): | ||
# X.append([]) | ||
# xlabels.append(attribute) | ||
|
||
|
||
# Prepare the data for the classifier. | ||
times = list(nem) | ||
times.sort() | ||
xLabels = list(interconnectors.itervalues().next()) | ||
xLabels.sort() | ||
|
||
for time in times: | ||
# Add the classifications: | ||
if float(nem[time]['nsw']['price']) >= 300: | ||
classification = 1 | ||
else: | ||
classification = 0 | ||
y.append(classification) | ||
row = [] | ||
for attribute in xLabels: | ||
row.append(interconnectors[time][attribute]) | ||
X.append(row) | ||
|
||
X = np.array(X) | ||
y = np.array(y) | ||
|
||
print X | ||
print np.sum(y) | ||
|
||
|
||
|
||
|
||
# Build a classification task using 3 informative features | ||
# X, y = make_classification(n_samples=1000, | ||
# n_features=10, | ||
# n_informative=3, | ||
# n_redundant=0, | ||
# n_repeated=0, | ||
# n_classes=2, | ||
# random_state=0, | ||
# shuffle=False) | ||
|
||
# print X | ||
|
||
# Build a forest and compute the feature importances | ||
forest = ExtraTreesClassifier(n_estimators=250,random_state=0) | ||
|
||
|
||
|
||
forest.fit(X, y) | ||
importances = forest.feature_importances_ | ||
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0) | ||
indices = np.argsort(importances)[::-1] | ||
|
||
# Print the feature ranking | ||
print("Feature ranking:") | ||
|
||
for f in range(X.shape[1]): | ||
|
||
print("%d. feature %d (%f) %s" % (f + 1, indices[f], importances[indices[f]], str(xLabels[indices[f]]))) | ||
|
||
# Plot the feature importances of the forest | ||
plt.figure() | ||
plt.title("Feature importances") | ||
plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center") | ||
plt.xticks(range(X.shape[1]), indices) | ||
plt.xlim([-1, X.shape[1]]) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import os | ||
import csv | ||
import numpy as np | ||
import pickle | ||
import pandas as pd | ||
|
||
|
||
def getNem(): | ||
myFile = open('nem_allstates.csv') | ||
nemData = csv.DictReader(myFile) | ||
nem = {} | ||
|
||
for timePeriod in nemData: | ||
timeString = timePeriod['Time-ending'] | ||
|
||
nem[timeString] = { | ||
'nsw': { | ||
'price': float(timePeriod['NSW1 Price']), | ||
'demand':float(timePeriod['NSW1 Scheduled Demand']), | ||
'nonScheduled':float(timePeriod['NSW1 Non-scheduled']), | ||
'generation': float(timePeriod['NSW1 Generation']), | ||
'availability':float(timePeriod['NSW1 Availability']), | ||
}, | ||
'vic': { | ||
'price': float(timePeriod['VIC1 Price']), | ||
'demand':float(timePeriod['VIC1 Scheduled Demand']), | ||
'nonScheduled':float(timePeriod['VIC1 Non-scheduled']), | ||
'generation': float(timePeriod['VIC1 Generation']), | ||
'availability':float(timePeriod['VIC1 Availability']), | ||
}, | ||
'qld': { | ||
'price': float(timePeriod['QLD1 Price']), | ||
'demand':float(timePeriod['QLD1 Scheduled Demand']), | ||
'nonScheduled':float(timePeriod['QLD1 Non-scheduled']), | ||
'generation': float(timePeriod['QLD1 Generation']), | ||
'availability':float(timePeriod['QLD1 Availability']), | ||
}, | ||
'sa': { | ||
'price': float(timePeriod['SA1 Price']), | ||
'demand':float(timePeriod['SA1 Scheduled Demand']), | ||
'nonScheduled':float(timePeriod['SA1 Non-scheduled']), | ||
'generation': float(timePeriod['SA1 Generation']), | ||
'availability':float(timePeriod['SA1 Availability']), | ||
}, | ||
'tas': { | ||
'price': float(timePeriod['TAS1 Price']), | ||
'demand':float(timePeriod['TAS1 Scheduled Demand']), | ||
'nonScheduled':float(timePeriod['TAS1 Non-scheduled']), | ||
'generation': float(timePeriod['TAS1 Generation']), | ||
'availability':float(timePeriod['TAS1 Availability']), | ||
}, | ||
} | ||
return nem | ||
|
||
def getInterconnectorFlows(): | ||
filename = 'interconnectorflows.csv' | ||
# Reading the file | ||
df = pd.read_csv(filename, index_col=0) | ||
# Creating the dict | ||
flows = df.transpose().to_dict() | ||
return flows | ||
|
||
|
||
|
||
|
||
|