Skip to content

Commit

Permalink
Initialize the repository
Browse files Browse the repository at this point in the history
  • Loading branch information
thanhleha committed May 3, 2016
0 parents commit b9fe21d
Show file tree
Hide file tree
Showing 17 changed files with 5,599 additions and 0 deletions.
25 changes: 25 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders

# Eclipse Core
.project

# PyDev specific (Python IDE for Eclipse)
*.pydevproject

# Compile pyc files
src/*.pyc
src/data/*.pyc
src/model/*.pyc
src/processing/*.pyc
src/report/*.pyc
src/util/*.pyc
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# NNPraktikum
The coding framework for KIT Neural Network Praktikum

See [ilias.studium.kit.edu](https://ilias.studium.kit.edu/goto_produktiv_crs_413999.html)
for more information.

## Python version
We use Python 2.7, not Python 3.x

## Requirements

You have to install two 3rd-party packages: Numpy and scikit-learn.

### Numpy

You might have to install numpy manually. Please follow the
[official guide](http://docs.scipy.org/doc/numpy/user/install.html) to do so.

### Scikit-learn

Scikit-learn is also called sklearn. Don't be confused by that, it is the same
package. Please follow the
[official guide](http://scikit-learn.org/stable/install.html) to
install it.
5,000 changes: 5,000 additions & 0 deletions data/mnist_seven.csv

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions src/Run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from data.mnist_seven import MNISTSeven
from model.stupid_recognizer import StupidRecognizer
from model.perceptron import Perceptron
from model.logistic_regression import LogisticRegression
from report.evaluator import Evaluator


def main():
data = MNISTSeven("../data/mnist_seven.csv", 3000, 1000, 1000)
myStupidClassifier = StupidRecognizer(data.trainingSet,
data.validationSet,
data.testSet)
# Uncomment this to make your Perceptron evaluated
# myPerceptronClassifier = Perceptron(data.trainingSet,
# data.validationSet,
# data.testSet,
# learningRate=0.005,
# epochs=30)

# Train the classifiers
print("=========================")
print("Training..")

print("\nStupid Classifier has been training..")
myStupidClassifier.train()
print("Done..")

# print("\nPerceptron has been training..")
# myPerceptronClassifier.train()
# print("Done..")

# Do the recognizer
# Explicitly specify the test set to be evaluated
stupidPred = myStupidClassifier.evaluate()
# Uncomment this to make your Perceptron evaluated
# perceptronPred = myPerceptronClassifier.evaluate()

# Report the result
print("=========================")
evaluator = Evaluator()

print("Result of the stupid recognizer:")
# evaluator.printComparison(data.testSet, stupidPred)
evaluator.printAccuracy(data.testSet, stupidPred)

print("\nResult of the Perceptron recognizer:")
# evaluator.printComparison(data.testSet, perceptronPred)
# Uncomment this to make your Perceptron evaluated
# evaluator.printAccuracy(data.testSet, perceptronPred)

# eval.printConfusionMatrix(data.testSet, pred)
# eval.printClassificationResult(data.testSet, pred, target_names)

if __name__ == '__main__':
main()
Empty file added src/data/__init__.py
Empty file.
41 changes: 41 additions & 0 deletions src/data/data_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-


class DataSet(object):
"""
Representing train, valid or test sets
Parameters
----------
data : list
oneHot : bool
If this flag is set, then all labels which are not `targetDigit` will
be transformed to False and `targetDigit` bill be transformed to True.
targetDigit : string
Label of the dataset, e.g. '7'.
Attributes
----------
input : list
label : list
A labels for the data given in `input`.
oneHot : bool
targetDigit : string
"""

def __init__(self, data, oneHot=True, targetDigit='7'):

# The label of the digits is always the first fields
self.input = 1.0 * data[:, 1:]/255
self.label = data[:, 0]
self.oneHot = oneHot
self.targetDigit = targetDigit

# Transform all labels which is not the targetDigit to False,
# The label of targetDigit will be True,
if oneHot:
self.label = list(map(lambda a: 1 if str(a) == targetDigit else 0,
self.label))

def __iter__(self):
return self.input.__iter__()
56 changes: 56 additions & 0 deletions src/data/mnist_seven.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-

import numpy as np
from numpy.random import shuffle
from data.data_set import DataSet


class MNISTSeven(object):
"""
Small subset (5000 instances) of MNIST data to recognize the digit 7
Parameters
----------
dataPath : string
Path to a CSV file with delimiter ',' and unint8 values.
numTrain : int
Number of training examples.
numValid : int
Number of validation examples.
numTest : int
Number of test examples.
Attributes
----------
trainingSet : list
validationSet : list
testSet : list
"""

# dataPath = "data/mnist_seven.csv"

def __init__(self, dataPath, numTrain=3000, numValid=1000, numTest=1000):

self.trainingSet = []
self.validationSet = []
self.testSet = []

self.load(dataPath, numTrain, numValid, numTest)

def load(self, dataPath, numTrain, numValid, numTest):
"""Load the data."""
print("Loading data from " + dataPath + "...")

data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8")

# The last numTest instances ALWAYS comprise the test set.
train, test = data[:numTrain+numValid], data[numTrain+numValid:]
shuffle(train)

train, valid = train[:numTrain], train[numTrain:]

self.trainingSet = DataSet(train)
self.validationSet = DataSet(valid)
self.testSet = DataSet(test)

print("Data loaded.")
Empty file added src/model/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions src/model/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-

from abc import ABCMeta, abstractmethod


class Classifier:
"""
Abstract class of a classifier
"""
__metaclass__ = ABCMeta

@abstractmethod
def train(self, trainingSet, validationSet):
# train procedures of the classifier
pass

@abstractmethod
def classify(self, testInstance):
# classify an instance given the model of the classifier
pass

@abstractmethod
def evaluate(self, test):
# evaluate a whole test set given the model of the classifier
pass
105 changes: 105 additions & 0 deletions src/model/perceptron.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-

import sys
import logging

import numpy as np

from util.activation_functions import Activation
from model.classifier import Classifier

logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
stream=sys.stdout)


class Perceptron(Classifier):
"""
A digit-7 recognizer based on perceptron algorithm
Parameters
----------
train : list
valid : list
test : list
learningRate : float
epochs : positive int
Attributes
----------
learningRate : float
epochs : int
trainingSet : list
validationSet : list
testSet : list
weight : list
"""
def __init__(self, train, valid, test, learningRate=0.01, epochs=50):

self.learningRate = learningRate
self.epochs = epochs

self.trainingSet = train
self.validationSet = valid
self.testSet = test

# Initialize the weight vector with small random values
# around 0 and 0.1
self.weight = np.random.rand(self.trainingSet.input.shape[1])/10

def train(self, verbose=True):
"""Train the perceptron with the perceptron learning algorithm.
Parameters
----------
verbose : boolean
Print logging messages with validation accuracy if verbose is True.
"""

# Here you have to implement the Perceptron Learning Algorithm
# to change the weights of the Perceptron
pass

def classify(self, testInstance):
"""Classify a single instance.
Parameters
----------
testInstance : list of floats
Returns
-------
bool :
True if the testInstance is recognized as a 7, False otherwise.
"""
# Here you have to implement the classification for one instance,
# i.e., return True if the testInstance is recognized as a 7,
# False otherwise
pass

def evaluate(self, test=None):
"""Evaluate a whole dataset.
Parameters
----------
test : the dataset to be classified
if no test data, the test set associated to the classifier will be used
Returns
-------
List:
List of classified decisions for the dataset's entries.
"""
if test is None:
test = self.testSet.input

# Here is the map function of python - a functional programming concept
# It applies the "classify" method to every element of "test"
# Once you can classify an instance, just use map for all of the test
# set.
return list(map(self.classify, test))

def fire(self, input):
"""Fire the output of the perceptron corresponding to the input """
# I already implemented it for you to see how you can work with numpy
return Activation.sign(np.dot(np.array(input), self.weight))
39 changes: 39 additions & 0 deletions src/model/stupid_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-

from random import random
from model.classifier import Classifier

__author__ = "ABC XYZ" # Adjust this when you copy the file
__email__ = "[email protected]" # Adjust this when you copy the file


class StupidRecognizer(Classifier):
"""
This class demonstrates how to follow an OOP approach to build a digit
recognizer.
It also serves as a baseline to compare with other
recognizing method later on.
The method is that it will randomly decide the digit is a "7" or not
based on the probability 'byChance'.
"""

def __init__(self, train, valid, test, byChance=0.5):

self.byChance = byChance

self.trainingSet = train
self.validationSet = valid
self.testSet = test

def train(self):
# Do nothing
pass

def classify(self, testInstance):
# byChance is the probability of being correctly recognized
return random() < self.byChance

def evaluate(self):
return list(map(self.classify, self.testSet.input))
Empty file added src/processing/__init__.py
Empty file.
Empty file added src/report/__init__.py
Empty file.
Loading

0 comments on commit b9fe21d

Please sign in to comment.