Skip to content

Commit

Permalink
Now including feature selection via forests for interconnectors.
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-marshall committed May 22, 2017
1 parent 7349072 commit 437a182
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 82 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@

*.png

*.pyc
90 changes: 90 additions & 0 deletions featureselection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Feature selection trial
# Using forests
# Adapted from sklearn tutorials.
# Original code here http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html#sphx-glr-auto-examples-ensemble-plot-forest-importances-py


import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier

import marketUtils



nem = marketUtils.getNem()
interconnectors = marketUtils.getInterconnectorFlows()
X = []
# xlabels = []
y = []

# for attribute in list(interconnectors.itervalues().next()):
# X.append([])
# xlabels.append(attribute)


# Prepare the data for the classifier.
times = list(nem)
times.sort()
xLabels = list(interconnectors.itervalues().next())
xLabels.sort()

for time in times:
# Add the classifications:
if float(nem[time]['nsw']['price']) >= 300:
classification = 1
else:
classification = 0
y.append(classification)
row = []
for attribute in xLabels:
row.append(interconnectors[time][attribute])
X.append(row)

X = np.array(X)
y = np.array(y)

print X
print np.sum(y)




# Build a classification task using 3 informative features
# X, y = make_classification(n_samples=1000,
# n_features=10,
# n_informative=3,
# n_redundant=0,
# n_repeated=0,
# n_classes=2,
# random_state=0,
# shuffle=False)

# print X

# Build a forest and compute the feature importances
forest = ExtraTreesClassifier(n_estimators=250,random_state=0)



forest.fit(X, y)
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(X.shape[1]):

print("%d. feature %d (%f) %s" % (f + 1, indices[f], importances[indices[f]], str(xLabels[indices[f]])))

# Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center")
plt.xticks(range(X.shape[1]), indices)
plt.xlim([-1, X.shape[1]])
plt.show()
89 changes: 7 additions & 82 deletions interconnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,91 +7,14 @@
import matplotlib.pyplot as plt
import pandas as pd

from matplotlib import colors as mcolors


# from bokeh.charts import Scatter, output_file, show
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.autompg import autompg as df






def saveToPickle(my_object, fileName):
print("Pickling my_object to file: "+str(fileName)+"...")
pickle.dump(my_object, open(fileName, "wb"))
print ("Saved.")

def getFromPickle(fileName):
if os.path.isfile(fileName):
my_object = pickle.load(open(fileName, "rb"))
return my_object
else:
return None

def getNem():
myFile = open('nem_allstates.csv')
nemData = csv.DictReader(myFile)
nem = {}

for timePeriod in nemData:
timeString = timePeriod['Time-ending']

nem[timeString] = {
'nsw': {
'price': timePeriod['NSW1 Price'],
'demand':float(timePeriod['NSW1 Scheduled Demand']),
'nonScheduled':float(timePeriod['NSW1 Non-scheduled']),
'generation': float(timePeriod['NSW1 Generation']),
'availability':float(timePeriod['NSW1 Availability']),
},
'vic': {
'price': timePeriod['VIC1 Price'],
'demand':float(timePeriod['VIC1 Scheduled Demand']),
'nonScheduled':float(timePeriod['VIC1 Non-scheduled']),
'generation': float(timePeriod['VIC1 Generation']),
'availability':float(timePeriod['VIC1 Availability']),
},
'qld': {
'price': timePeriod['QLD1 Price'],
'demand':float(timePeriod['QLD1 Scheduled Demand']),
'nonScheduled':float(timePeriod['QLD1 Non-scheduled']),
'generation': float(timePeriod['QLD1 Generation']),
'availability':float(timePeriod['QLD1 Availability']),
},
'sa': {
'price': timePeriod['SA1 Price'],
'demand':float(timePeriod['SA1 Scheduled Demand']),
'nonScheduled':float(timePeriod['SA1 Non-scheduled']),
'generation': float(timePeriod['SA1 Generation']),
'availability':float(timePeriod['SA1 Availability']),
},
'tas': {
'price': timePeriod['TAS1 Price'],
'demand':float(timePeriod['TAS1 Scheduled Demand']),
'nonScheduled':float(timePeriod['TAS1 Non-scheduled']),
'generation': float(timePeriod['TAS1 Generation']),
'availability':float(timePeriod['TAS1 Availability']),
},
}
return nem

def getInterconnectorFlows():
filename = 'interconnectorflows.csv'
# Reading the file
df = pd.read_csv(filename, index_col=0)
# Creating the dict
flows = df.transpose().to_dict()
return flows



import marketUtils




# Generates data for plotting, returns as dict, is then plotted when key_event function is called.

def chartFlowVsPrice(nem, flows):
plots = []
Expand Down Expand Up @@ -153,11 +76,13 @@ def key_event(e):
plt.title(plots[curr_plt_index]['title'])
fig.canvas.draw()

nem = getNem()
flows = getInterconnectorFlows()
plots = chartFlowVsPrice(nem, flows)



nem = marketUtils.getNem()
flows = marketUtils.getInterconnectorFlows()
plots = chartFlowVsPrice(nem, flows)

fig = plt.figure()
fig.canvas.mpl_connect('key_press_event', key_event)
ax = fig.add_subplot(111)
Expand Down
66 changes: 66 additions & 0 deletions marketUtils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import csv
import numpy as np
import pickle
import pandas as pd


def getNem():
myFile = open('nem_allstates.csv')
nemData = csv.DictReader(myFile)
nem = {}

for timePeriod in nemData:
timeString = timePeriod['Time-ending']

nem[timeString] = {
'nsw': {
'price': float(timePeriod['NSW1 Price']),
'demand':float(timePeriod['NSW1 Scheduled Demand']),
'nonScheduled':float(timePeriod['NSW1 Non-scheduled']),
'generation': float(timePeriod['NSW1 Generation']),
'availability':float(timePeriod['NSW1 Availability']),
},
'vic': {
'price': float(timePeriod['VIC1 Price']),
'demand':float(timePeriod['VIC1 Scheduled Demand']),
'nonScheduled':float(timePeriod['VIC1 Non-scheduled']),
'generation': float(timePeriod['VIC1 Generation']),
'availability':float(timePeriod['VIC1 Availability']),
},
'qld': {
'price': float(timePeriod['QLD1 Price']),
'demand':float(timePeriod['QLD1 Scheduled Demand']),
'nonScheduled':float(timePeriod['QLD1 Non-scheduled']),
'generation': float(timePeriod['QLD1 Generation']),
'availability':float(timePeriod['QLD1 Availability']),
},
'sa': {
'price': float(timePeriod['SA1 Price']),
'demand':float(timePeriod['SA1 Scheduled Demand']),
'nonScheduled':float(timePeriod['SA1 Non-scheduled']),
'generation': float(timePeriod['SA1 Generation']),
'availability':float(timePeriod['SA1 Availability']),
},
'tas': {
'price': float(timePeriod['TAS1 Price']),
'demand':float(timePeriod['TAS1 Scheduled Demand']),
'nonScheduled':float(timePeriod['TAS1 Non-scheduled']),
'generation': float(timePeriod['TAS1 Generation']),
'availability':float(timePeriod['TAS1 Availability']),
},
}
return nem

def getInterconnectorFlows():
filename = 'interconnectorflows.csv'
# Reading the file
df = pd.read_csv(filename, index_col=0)
# Creating the dict
flows = df.transpose().to_dict()
return flows





0 comments on commit 437a182

Please sign in to comment.