Initialize the repository

JPQuirmbach · May 3, 2016 · b9fe21d · b9fe21d
commit b9fe21d
Show file tree

Hide file tree

Showing 17 changed files with 5,599 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,25 @@
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+
+# Eclipse Core
+.project
+
+# PyDev specific (Python IDE for Eclipse)
+*.pydevproject
+
+# Compile pyc files
+src/*.pyc
+src/data/*.pyc
+src/model/*.pyc
+src/processing/*.pyc
+src/report/*.pyc
+src/util/*.pyc
diff --git a/README.md b/README.md
@@ -0,0 +1,24 @@
+# NNPraktikum
+The coding framework for KIT Neural Network Praktikum
+
+See [ilias.studium.kit.edu](https://ilias.studium.kit.edu/goto_produktiv_crs_413999.html)
+for more information.
+
+## Python version
+We use Python 2.7, not Python 3.x
+
+## Requirements
+
+You have to install two 3rd-party packages: Numpy and scikit-learn.
+
+### Numpy
+
+You might have to install numpy manually. Please follow the
+[official guide](http://docs.scipy.org/doc/numpy/user/install.html) to do so.
+
+### Scikit-learn
+
+Scikit-learn is also called sklearn. Don't be confused by that, it is the same
+package. Please follow the
+[official guide](http://scikit-learn.org/stable/install.html) to
+install it.
diff --git a/data/mnist_seven.csv b/data/mnist_seven.csv
diff --git a/src/Run.py b/src/Run.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from data.mnist_seven import MNISTSeven
+from model.stupid_recognizer import StupidRecognizer
+from model.perceptron import Perceptron
+from model.logistic_regression import LogisticRegression
+from report.evaluator import Evaluator
+
+
+def main():
+    data = MNISTSeven("../data/mnist_seven.csv", 3000, 1000, 1000)
+    myStupidClassifier = StupidRecognizer(data.trainingSet,
+                                          data.validationSet,
+                                          data.testSet)
+    # Uncomment this to make your Perceptron evaluated
+#     myPerceptronClassifier = Perceptron(data.trainingSet,
+#                                         data.validationSet,
+#                                         data.testSet,
+#                                         learningRate=0.005,
+#                                         epochs=30)
+
+    # Train the classifiers
+    print("=========================")
+    print("Training..")
+
+    print("\nStupid Classifier has been training..")
+    myStupidClassifier.train()
+    print("Done..")
+
+#     print("\nPerceptron has been training..")
+#     myPerceptronClassifier.train()
+#     print("Done..")
+
+    # Do the recognizer
+    # Explicitly specify the test set to be evaluated
+    stupidPred = myStupidClassifier.evaluate()
+    # Uncomment this to make your Perceptron evaluated
+#     perceptronPred = myPerceptronClassifier.evaluate()
+
+    # Report the result
+    print("=========================")
+    evaluator = Evaluator()
+
+    print("Result of the stupid recognizer:")
+    # evaluator.printComparison(data.testSet, stupidPred)
+    evaluator.printAccuracy(data.testSet, stupidPred)
+
+    print("\nResult of the Perceptron recognizer:")
+    # evaluator.printComparison(data.testSet, perceptronPred)
+    # Uncomment this to make your Perceptron evaluated
+#     evaluator.printAccuracy(data.testSet, perceptronPred)
+
+    # eval.printConfusionMatrix(data.testSet, pred)
+    # eval.printClassificationResult(data.testSet, pred, target_names)
+
+if __name__ == '__main__':
+    main()
diff --git a/src/data/__init__.py b/src/data/__init__.py
diff --git a/src/data/data_set.py b/src/data/data_set.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+
+
+class DataSet(object):
+    """
+    Representing train, valid or test sets
+
+    Parameters
+    ----------
+    data : list
+    oneHot : bool
+        If this flag is set, then all labels which are not `targetDigit` will
+        be transformed to False and `targetDigit` bill be transformed to True.
+    targetDigit : string
+        Label of the dataset, e.g. '7'.
+
+    Attributes
+    ----------
+    input : list
+    label : list
+        A labels for the data given in `input`.
+    oneHot : bool
+    targetDigit : string
+    """
+
+    def __init__(self, data, oneHot=True, targetDigit='7'):
+
+        # The label of the digits is always the first fields
+        self.input = 1.0 * data[:, 1:]/255
+        self.label = data[:, 0]
+        self.oneHot = oneHot
+        self.targetDigit = targetDigit
+
+        # Transform all labels which is not the targetDigit to False,
+        # The label of targetDigit will be True,
+        if oneHot:
+            self.label = list(map(lambda a: 1 if str(a) == targetDigit else 0,
+                                  self.label))
+
+    def __iter__(self):
+        return self.input.__iter__()
diff --git a/src/data/mnist_seven.py b/src/data/mnist_seven.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+from numpy.random import shuffle
+from data.data_set import DataSet
+
+
+class MNISTSeven(object):
+    """
+    Small subset (5000 instances) of MNIST data to recognize the digit 7
+
+    Parameters
+    ----------
+    dataPath : string
+        Path to a CSV file with delimiter ',' and unint8 values.
+    numTrain : int
+        Number of training examples.
+    numValid : int
+        Number of validation examples.
+    numTest : int
+        Number of test examples.
+
+    Attributes
+    ----------
+    trainingSet : list
+    validationSet : list
+    testSet : list
+    """
+
+    # dataPath = "data/mnist_seven.csv"
+
+    def __init__(self, dataPath, numTrain=3000, numValid=1000, numTest=1000):
+
+        self.trainingSet = []
+        self.validationSet = []
+        self.testSet = []
+
+        self.load(dataPath, numTrain, numValid, numTest)
+
+    def load(self, dataPath, numTrain, numValid, numTest):
+        """Load the data."""
+        print("Loading data from " + dataPath + "...")
+
+        data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8")
+
+        # The last numTest instances ALWAYS comprise the test set.
+        train, test = data[:numTrain+numValid], data[numTrain+numValid:]
+        shuffle(train)
+
+        train, valid = train[:numTrain], train[numTrain:]
+
+        self.trainingSet = DataSet(train)
+        self.validationSet = DataSet(valid)
+        self.testSet = DataSet(test)
+
+        print("Data loaded.")
diff --git a/src/model/__init__.py b/src/model/__init__.py
diff --git a/src/model/classifier.py b/src/model/classifier.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+from abc import ABCMeta, abstractmethod
+
+
+class Classifier:
+    """
+    Abstract class of a classifier
+    """
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def train(self, trainingSet, validationSet):
+        # train procedures of the classifier
+        pass
+
+    @abstractmethod
+    def classify(self, testInstance):
+        # classify an instance given the model of the classifier
+        pass
+
+    @abstractmethod
+    def evaluate(self, test):
+        # evaluate a whole test set given the model of the classifier
+        pass
diff --git a/src/model/perceptron.py b/src/model/perceptron.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import logging
+
+import numpy as np
+
+from util.activation_functions import Activation
+from model.classifier import Classifier
+
+logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
+                    level=logging.DEBUG,
+                    stream=sys.stdout)
+
+
+class Perceptron(Classifier):
+    """
+    A digit-7 recognizer based on perceptron algorithm
+
+    Parameters
+    ----------
+    train : list
+    valid : list
+    test : list
+    learningRate : float
+    epochs : positive int
+
+    Attributes
+    ----------
+    learningRate : float
+    epochs : int
+    trainingSet : list
+    validationSet : list
+    testSet : list
+    weight : list
+    """
+    def __init__(self, train, valid, test, learningRate=0.01, epochs=50):
+
+        self.learningRate = learningRate
+        self.epochs = epochs
+
+        self.trainingSet = train
+        self.validationSet = valid
+        self.testSet = test
+
+        # Initialize the weight vector with small random values
+        # around 0 and 0.1
+        self.weight = np.random.rand(self.trainingSet.input.shape[1])/10
+
+    def train(self, verbose=True):
+        """Train the perceptron with the perceptron learning algorithm.
+
+        Parameters
+        ----------
+        verbose : boolean
+            Print logging messages with validation accuracy if verbose is True.
+        """
+
+        # Here you have to implement the Perceptron Learning Algorithm
+        # to change the weights of the Perceptron
+        pass
+
+    def classify(self, testInstance):
+        """Classify a single instance.
+
+        Parameters
+        ----------
+        testInstance : list of floats
+
+        Returns
+        -------
+        bool :
+            True if the testInstance is recognized as a 7, False otherwise.
+        """
+        # Here you have to implement the classification for one instance,
+        # i.e., return True if the testInstance is recognized as a 7,
+        # False otherwise
+        pass
+
+    def evaluate(self, test=None):
+        """Evaluate a whole dataset.
+
+        Parameters
+        ----------
+        test : the dataset to be classified
+        if no test data, the test set associated to the classifier will be used
+
+        Returns
+        -------
+        List:
+            List of classified decisions for the dataset's entries.
+        """
+        if test is None:
+            test = self.testSet.input
+
+        # Here is the map function of python - a functional programming concept
+        # It applies the "classify" method to every element of "test"
+        # Once you can classify an instance, just use map for all of the test
+        # set.
+        return list(map(self.classify, test))
+
+    def fire(self, input):
+        """Fire the output of the perceptron corresponding to the input """
+        # I already implemented it for you to see how you can work with numpy
+        return Activation.sign(np.dot(np.array(input), self.weight))
diff --git a/src/model/stupid_recognizer.py b/src/model/stupid_recognizer.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+from random import random
+from model.classifier import Classifier
+
+__author__ = "ABC XYZ"  # Adjust this when you copy the file
+__email__ = "[email protected]"  # Adjust this when you copy the file
+
+
+class StupidRecognizer(Classifier):
+    """
+    This class demonstrates how to follow an OOP approach to build a digit
+    recognizer.
+
+    It also serves as a baseline to compare with other
+    recognizing method later on.
+
+    The method is that it will randomly decide the digit is a "7" or not
+    based on the probability 'byChance'.
+    """
+
+    def __init__(self, train, valid, test, byChance=0.5):
+
+        self.byChance = byChance
+
+        self.trainingSet = train
+        self.validationSet = valid
+        self.testSet = test
+
+    def train(self):
+        # Do nothing
+        pass
+
+    def classify(self, testInstance):
+        # byChance is the probability of being correctly recognized
+        return random() < self.byChance
+
+    def evaluate(self):
+        return list(map(self.classify, self.testSet.input))
diff --git a/src/processing/__init__.py b/src/processing/__init__.py
diff --git a/src/report/__init__.py b/src/report/__init__.py