From 3e295eb644287dbfea41377dbaad60e955e9a2a4 Mon Sep 17 00:00:00 2001 From: cogmission Date: Sat, 9 Jan 2016 10:41:15 -0600 Subject: [PATCH 01/31] modified build and pom files, Added new Parameter --- build.gradle | 4 +- pom.xml | 2 +- .../java/org/numenta/nupic/Parameters.java | 108 +++++++++++++++++- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/build.gradle b/build.gradle index f6b5c836..4b3d225b 100644 --- a/build.gradle +++ b/build.gradle @@ -4,7 +4,7 @@ apply plugin: 'eclipse' apply plugin: 'signing' group = 'org.numenta' -version = '0.6.5' +version = '0.6.6-SNAPSHOT' archivesBaseName = 'htm.java' sourceCompatibility = 1.8 @@ -12,7 +12,7 @@ targetCompatibility = 1.8 jar { manifest { - attributes 'Implementation-Title': 'htm.java', 'Implementation-Version': '0.6.5' + attributes 'Implementation-Title': 'htm.java', 'Implementation-Version': '0.6.6-SNAPSHOT' } } diff --git a/pom.xml b/pom.xml index aaacedf3..c3dc4330 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.numenta htm.java - 0.6.5 + 0.6.6-SNAPSHOT htm.java The Java version of Numenta's HTM technology diff --git a/src/main/java/org/numenta/nupic/Parameters.java b/src/main/java/org/numenta/nupic/Parameters.java index 8871b34e..d7bf26ca 100644 --- a/src/main/java/org/numenta/nupic/Parameters.java +++ b/src/main/java/org/numenta/nupic/Parameters.java @@ -57,6 +57,7 @@ public class Parameters { private static final Map DEFAULTS_TEMPORAL; private static final Map DEFAULTS_SPATIAL; private static final Map DEFAULTS_ENCODER; + private static final Map DEFAULTS_KNN; static { @@ -127,6 +128,28 @@ public class Parameters { defaultEncoderParams.put(KEY.AUTO_CLASSIFY, Boolean.FALSE); DEFAULTS_ENCODER = Collections.unmodifiableMap(defaultEncoderParams); defaultParams.putAll(DEFAULTS_ENCODER); + + ////////////////// KNNClassifier Defaults /////////////////// + Map defaultKNNParams = new ParametersMap(); + defaultKNNParams.put(KEY.K, 1); + defaultKNNParams.put(KEY.EXACT, false); + defaultKNNParams.put(KEY.DISTANCE_NORM, 2.0); + defaultKNNParams.put(KEY.DISTANCE_METHOD, DistanceMethod.NORM); + defaultKNNParams.put(KEY.DISTANCE_THRESHOLD, .0); + defaultKNNParams.put(KEY.DO_BINARIZATION, false); + defaultKNNParams.put(KEY.BINARIZATION_THRESHOLD, 0.5); + defaultKNNParams.put(KEY.USE_SPARSE_MEMORY, true); + defaultKNNParams.put(KEY.SPARSE_THRESHOLD, 0.1); + defaultKNNParams.put(KEY.RELATIVE_THRESHOLD, false); + defaultKNNParams.put(KEY.NUM_WINNERS, 0); + defaultKNNParams.put(KEY.NUM_SVD_SAMPLES, -1); + defaultKNNParams.put(KEY.NUM_SVD_DIMS, null); + defaultKNNParams.put(KEY.FRACTION_OF_MAX, -1.0); + defaultKNNParams.put(KEY.MAX_STORED_PATTERNS, -1); + defaultKNNParams.put(KEY.REPLACE_DUPLICATES, false); + defaultKNNParams.put(KEY.KNN_CELLS_PER_COL, 0); + DEFAULTS_KNN = Collections.unmodifiableMap(defaultKNNParams); + defaultParams.putAll(DEFAULTS_KNN); DEFAULTS_ALL = Collections.unmodifiableMap(defaultParams); } @@ -279,7 +302,6 @@ public static enum KEY { // Network Layer indicator for auto classifier generation AUTO_CLASSIFY("hasClassifiers", Boolean.class), - // How many bits to use if encoding the respective date fields. // e.g. Tuple(bits to use:int, radius:double) DATEFIELD_SEASON("season", Tuple.class), @@ -289,7 +311,80 @@ public static enum KEY { DATEFIELD_TOFD("timeOfDay", Tuple.class), DATEFIELD_CUSTOM("customDays", Tuple.class), // e.g. Tuple(bits:int, List:"mon,tue,fri") DATEFIELD_PATTERN("formatPattern", String.class), - DATEFIELD_FORMATTER("dateFormatter", DateTimeFormatter.class); + DATEFIELD_FORMATTER("dateFormatter", DateTimeFormatter.class), + + + ///////////// KNNClassifier Parameters ////////////// + /** The number of nearest neighbors used in the classification of patterns. Must be odd */ + K("k", Integer.class), + /** If true, patterns must match exactly when assigning class labels */ + EXACT("exact", Boolean.class), + /** When distance method is "norm", this specifies the p value of the Lp-norm */ + DISTANCE_NORM("distanceNorm", Double.class), + /** + * The method used to compute distance between input patterns and prototype patterns. + * see({@link DistanceMethod}) + */ + DISTANCE_METHOD("distanceMethod", DistanceMethod.class), + /** + * A threshold on the distance between learned + * patterns and a new pattern proposed to be learned. The distance must be + * greater than this threshold in order for the new pattern to be added to + * the classifier's memory + */ + DISTANCE_THRESHOLD("distanceThreshold", Double.class), + /** If True, then scalar inputs will be binarized. */ + DO_BINARIZATION("doBinarization", Boolean.class), + /** If doBinarization is True, this specifies the threshold for the binarization of inputs */ + BINARIZATION_THRESHOLD("binarizationThreshold", Double.class), + /** If True, classifier will use a sparse memory matrix */ + USE_SPARSE_MEMORY("useSparseMemory", Boolean.class), + /** + * If useSparseMemory is True, input variables whose absolute values are + * less than this threshold will be stored as zero + */ + SPARSE_THRESHOLD("sparseThreshold", Double.class), + /** Flag specifying whether to multiply sparseThreshold by max value in input */ + RELATIVE_THRESHOLD("relativeThreshold", Boolean.class), + /** Number of elements of the input that are stored. If 0, all elements are stored */ + NUM_WINNERS("numWinners", Integer.class), + /** + * Number of samples the must occur before a SVD + * (Singular Value Decomposition) transformation will be performed. If 0, + * the transformation will never be performed + */ + NUM_SVD_SAMPLES("numSVDSamples", Integer.class), + /** + * Controls dimensions kept after SVD transformation. If "adaptive", + * the number is chosen automatically + */ + NUM_SVD_DIMS("numSVDDims", Constants.KNN.class), + /** + * If numSVDDims is "adaptive", this controls the + * smallest singular value that is retained as a fraction of the largest + * singular value + */ + FRACTION_OF_MAX("fractionOfMax", Double.class), + /** + * Limits the maximum number of the training + * patterns stored. When KNN learns in a fixed capacity mode, the unused + * patterns are deleted once the number of stored patterns is greater than + * maxStoredPatterns. A value of -1 is no limit + */ + MAX_STORED_PATTERNS("maxStoredPatterns", Integer.class), + /** + * A boolean flag that determines whether, + * during learning, the classifier replaces duplicates that match exactly, + * even if distThreshold is 0. Should be TRUE for online learning + */ + REPLACE_DUPLICATES("replaceDuplicates", Boolean.class), + /** + * If >= 1, input is assumed to be organized into + * columns, in the same manner as the temporal pooler AND whenever a new + * prototype is stored, only the start cell (first cell) is stored in any + * bursting column + */ + KNN_CELLS_PER_COL("cellsPerCol", Integer.class); private static final Map fieldMap = new HashMap<>(); @@ -440,6 +535,15 @@ public static Parameters getSpatialDefaultParameters() { public static Parameters getEncoderDefaultParameters() { return getParameters(DEFAULTS_ENCODER); } + + /** + * Factory method. Return KNNClassifier {@link Parameters} object with default values + * @return + */ + public static Parameters getKNNDefaultParameters() { + return getParameters(DEFAULTS_KNN); + } + /** * Called internally to populate a {@link Parameters} object with the keys * and values specified in the passed in map. From 219a023e084d0e6388722cf4f33e968750d0db19 Mon Sep 17 00:00:00 2001 From: cogmission Date: Sat, 9 Jan 2016 10:42:54 -0600 Subject: [PATCH 02/31] Added new KNNClassifier class and test --- .../nupic/algorithms/KNNClassifier.java | 523 ++++++++++++++++++ .../nupic/algorithms/KNNClassifierTest.java | 15 + 2 files changed, 538 insertions(+) create mode 100644 src/main/java/org/numenta/nupic/algorithms/KNNClassifier.java create mode 100644 src/test/java/org/numenta/nupic/algorithms/KNNClassifierTest.java diff --git a/src/main/java/org/numenta/nupic/algorithms/KNNClassifier.java b/src/main/java/org/numenta/nupic/algorithms/KNNClassifier.java new file mode 100644 index 00000000..66c5506b --- /dev/null +++ b/src/main/java/org/numenta/nupic/algorithms/KNNClassifier.java @@ -0,0 +1,523 @@ +/* --------------------------------------------------------------------- + * Numenta Platform for Intelligent Computing (NuPIC) + * Copyright (C) 2015, Numenta, Inc. Unless you have an agreement + * with Numenta, Inc., for a separate license for this software code, the + * following terms and conditions apply: + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Affero Public License for more details. + * + * You should have received a copy of the GNU Affero Public License + * along with this program. If not, see http://www.gnu.org/licenses. + * + * http://numenta.org/licenses/ + * --------------------------------------------------------------------- + */ +package org.numenta.nupic.algorithms; + +import java.lang.reflect.Field; + +import org.numenta.nupic.Constants; +import org.numenta.nupic.DistanceMethod; +import org.numenta.nupic.Parameters; + +/** + * This class implements NuPIC's k Nearest Neighbor Classifier. KNN is very + * useful as a basic classifier for many situations. This implementation contains + * many enhancements that are useful for HTM experiments. These enhancements + * include an optimized C++ class for sparse vectors, support for continuous + * online learning, support for various distance methods (including Lp-norm and + * raw overlap), support for performing SVD on the input vectors (very useful for + * large vectors), support for a fixed-size KNN, and a mechanism to store custom + * ID's for each vector. + * + * @author Numenta + * @author cogmission + */ +public class KNNClassifier { + /** The number of nearest neighbors used in the classification of patterns. Must be odd */ + private int k = 1; + /** If true, patterns must match exactly when assigning class labels */ + private boolean exact; + /** When distance method is "norm", this specifies the p value of the Lp-norm */ + private double distanceNorm; + /** + * The method used to compute distance between input patterns and prototype patterns. + * see({@link DistanceMethod}) + */ + private DistanceMethod distanceMethod; + /** + * A threshold on the distance between learned + * patterns and a new pattern proposed to be learned. The distance must be + * greater than this threshold in order for the new pattern to be added to + * the classifier's memory + */ + private double distanceThreshold; + /** If True, then scalar inputs will be binarized. */ + private boolean doBinarization; + /** If doBinarization is True, this specifies the threshold for the binarization of inputs */ + private double binarizationThreshold; + /** If True, classifier will use a sparse memory matrix */ + private boolean useSparseMemory; + /** + * If useSparseMemory is True, input variables whose absolute values are + * less than this threshold will be stored as zero + */ + private double sparseThreshold; + /** Flag specifying whether to multiply sparseThreshold by max value in input */ + private boolean relativeThreshold; + /** Number of elements of the input that are stored. If 0, all elements are stored */ + private int numWinners; + /** + * Number of samples the must occur before a SVD + * (Singular Value Decomposition) transformation will be performed. If 0, + * the transformation will never be performed + */ + private int numSVDSamples; + /** + * Controls dimensions kept after SVD transformation. If "adaptive", + * the number is chosen automatically + */ + private Constants.KNN numSVDDims; + /** + * If numSVDDims is "adaptive", this controls the + * smallest singular value that is retained as a fraction of the largest + * singular value + */ + private double fractionOfMax; + /** + * Limits the maximum number of the training + * patterns stored. When KNN learns in a fixed capacity mode, the unused + * patterns are deleted once the number of stored patterns is greater than + * maxStoredPatterns. A value of -1 is no limit + */ + private int maxStoredPatterns; + /** + * A boolean flag that determines whether, + * during learning, the classifier replaces duplicates that match exactly, + * even if distThreshold is 0. Should be TRUE for online learning + */ + private boolean replaceDuplicates; + /** + * If >= 1, input is assumed to be organized into + * columns, in the same manner as the temporal pooler AND whenever a new + * prototype is stored, only the start cell (first cell) is stored in any + * bursting column + */ + private int cellsPerCol; + + + /** + * Privately constructs a {@code KNNClassifier}. + * This method is called by the + */ + private KNNClassifier() {} + + /** + * Returns a {@link Builder} used to fully construct a {@code KNNClassifier} + * @return + */ + public static Builder builder() { + return new KNNClassifier.Builder(); + } + + /** + * Returns the number of nearest neighbors used in the classification of patterns. Must be odd + * @return the k + */ + public int getK() { + return k; + } + + /** + * If true, patterns must match exactly when assigning class labels + * @return the exact + */ + public boolean isExact() { + return exact; + } + + /** + * When distance method is "norm", this specifies the p value of the Lp-norm + * @return the distanceNorm + */ + public double getDistanceNorm() { + return distanceNorm; + } + + /** + * The method used to compute distance between input patterns and prototype patterns. + * see({@link DistanceMethod}) + * + * @return the distanceMethod + */ + public DistanceMethod getDistanceMethod() { + return distanceMethod; + } + + /** + * A threshold on the distance between learned + * patterns and a new pattern proposed to be learned. The distance must be + * greater than this threshold in order for the new pattern to be added to + * the classifier's memory + * + * @return the distanceThreshold + */ + public double getDistanceThreshold() { + return distanceThreshold; + } + + /** + * If True, then scalar inputs will be binarized. + * @return the doBinarization + */ + public boolean isDoBinarization() { + return doBinarization; + } + + /** + * If doBinarization is True, this specifies the threshold for the binarization of inputs + * @return the binarizationThreshold + */ + public double getBinarizationThreshold() { + return binarizationThreshold; + } + + /** + * If True, classifier will use a sparse memory matrix + * @return the useSparseMemory + */ + public boolean isUseSparseMemory() { + return useSparseMemory; + } + + /** + * If useSparseMemory is True, input variables whose absolute values are + * less than this threshold will be stored as zero + * @return the sparseThreshold + */ + public double getSparseThreshold() { + return sparseThreshold; + } + + /** + * Flag specifying whether to multiply sparseThreshold by max value in input + * @return the relativeThreshold + */ + public boolean isRelativeThreshold() { + return relativeThreshold; + } + + /** + * Number of elements of the input that are stored. If 0, all elements are stored + * @return the numWinners + */ + public int getNumWinners() { + return numWinners; + } + + /** + * Number of samples the must occur before a SVD + * (Singular Value Decomposition) transformation will be performed. If 0, + * the transformation will never be performed + * + * @return the numSVDSamples + */ + public int getNumSVDSamples() { + return numSVDSamples; + } + + /** + * Controls dimensions kept after SVD transformation. If "adaptive", + * the number is chosen automatically + * + * @return the numSVDDims + */ + public Constants.KNN getNumSVDDims() { + return numSVDDims; + } + + /** + * If numSVDDims is "adaptive", this controls the + * smallest singular value that is retained as a fraction of the largest + * singular value + * + * @return the fractionOfMax + */ + public double getFractionOfMax() { + return fractionOfMax; + } + + /** + * Limits the maximum number of the training + * patterns stored. When KNN learns in a fixed capacity mode, the unused + * patterns are deleted once the number of stored patterns is greater than + * maxStoredPatterns. A value of -1 is no limit + * + * @return the maxStoredPatterns + */ + public int getMaxStoredPatterns() { + return maxStoredPatterns; + } + + /** + * A boolean flag that determines whether, + * during learning, the classifier replaces duplicates that match exactly, + * even if distThreshold is 0. Should be TRUE for online learning + * + * @return the replaceDuplicates + */ + public boolean isReplaceDuplicates() { + return replaceDuplicates; + } + + /** + * If >= 1, input is assumed to be organized into + * columns, in the same manner as the temporal pooler AND whenever a new + * prototype is stored, only the start cell (first cell) is stored in any + * bursting column + * + * @return the cellsPerCol + */ + public int getCellsPerCol() { + return cellsPerCol; + } + + /** + * Implements the Builder Pattern for creating {@link KNNClassifier}s. + */ + public static class Builder { + private KNNClassifier fieldHolder = new KNNClassifier(); + + public Builder() {} + + /** + * Returns a new KNNClassifier contructed from the fields specified + * by this {@code Builder} + * @return + */ + public KNNClassifier build() { + KNNClassifier retVal = new KNNClassifier(); + for(Field f : fieldHolder.getClass().getDeclaredFields()) { + f.setAccessible(true); + try { + f.set(retVal, f.get(fieldHolder)); + }catch(Exception e) { + e.printStackTrace(); + } + } + return retVal; + } + + /** + * Returns a thoroughly constructed KNNClassifier using the + * parameters specified by the argument. + * @param p + * @return + */ + public KNNClassifier apply(Parameters p) { + KNNClassifier retVal = new KNNClassifier(); + p.apply(retVal); + return retVal; + } + + /** + * The number of nearest neighbors used in the classification of patterns. Must be odd + * @param k + * @return this Builder + */ + public Builder k(int k) { + fieldHolder.k = k; + return this; + } + + /** + * If true, patterns must match exactly when assigning class labels + * @param b + * @return this Builder + */ + public Builder exact(boolean b) { + fieldHolder.exact = b; + return this; + } + + /** + * When distance method is "norm", this specifies the p value of the Lp-norm + * @param distanceNorm + * @return this Builder + */ + public Builder distanceNorm(double distanceNorm) { + fieldHolder.distanceNorm = distanceNorm; + return this; + } + + /** + * The method used to compute distance between input patterns and prototype patterns. + * see({@link DistanceMethod}) + * @param method + * @return + */ + public Builder distanceMethod(DistanceMethod method) { + fieldHolder.distanceMethod = method; + return this; + } + + /** + * A threshold on the distance between learned + * patterns and a new pattern proposed to be learned. The distance must be + * greater than this threshold in order for the new pattern to be added to + * the classifier's memory + * + * @param threshold + * @return this Builder + */ + public Builder distanceThreshold(double threshold) { + fieldHolder.distanceThreshold = threshold; + return this; + } + + /** + * If True, then scalar inputs will be binarized. + * @param b + * @return this Builder + */ + public Builder doBinarization(boolean b) { + fieldHolder.doBinarization = b; + return this; + } + + /** + * If doBinarization is True, this specifies the threshold for the binarization of inputs + * @param threshold + * @return this Builder + */ + public Builder binarizationThreshold(double threshold) { + fieldHolder.binarizationThreshold = threshold; + return this; + } + + /** + * If True, classifier will use a sparse memory matrix + * @param b + * @return this Builder + */ + public Builder useSparseMemory(boolean b) { + fieldHolder.useSparseMemory = b; + return this; + } + + /** + * If useSparseMemory is True, input variables whose absolute values are + * less than this threshold will be stored as zero + * @param threshold + * @return this Builder + */ + public Builder sparseThreshold(double threshold) { + fieldHolder.sparseThreshold = threshold; + return this; + } + + /** + * Flag specifying whether to multiply sparseThreshold by max value in input + * @param b + * @return this Builder + */ + public Builder relativeThreshold(boolean b) { + fieldHolder.relativeThreshold = b; + return this; + } + + /** + * Number of elements of the input that are stored. If 0, all elements are stored + * @param b + * @return this Builder + */ + public Builder numWinners(int num) { + fieldHolder.numWinners = num; + return this; + } + + /** + * Number of samples the must occur before a SVD + * (Singular Value Decomposition) transformation will be performed. If 0, + * the transformation will never be performed + * + * @param b + * @return this Builder + */ + public Builder numSVDSamples(int num) { + fieldHolder.numSVDSamples = num; + return this; + } + + /** + * Controls dimensions kept after SVD transformation. If "adaptive", + * the number is chosen automatically + * @param con + * @return this Builder + */ + public Builder numSVDDims(Constants.KNN constant) { + fieldHolder.numSVDDims = constant; + return this; + } + + /** + * If numSVDDims is "adaptive", this controls the + * smallest singular value that is retained as a fraction of the largest + * singular value + * + * @param fraction + * @return this Builder + */ + public Builder fractionOfMax(double fraction) { + fieldHolder.fractionOfMax = fraction; + return this; + } + + /** + * Limits the maximum number of the training + * patterns stored. When KNN learns in a fixed capacity mode, the unused + * patterns are deleted once the number of stored patterns is greater than + * maxStoredPatterns. A value of -1 is no limit + * + * @param max + * @return the Builder + */ + public Builder maxStoredPatterns(int max) { + fieldHolder.maxStoredPatterns = max; + return this; + } + + /** + * A boolean flag that determines whether, + * during learning, the classifier replaces duplicates that match exactly, + * even if distThreshold is 0. Should be TRUE for online learning + * @param b + * @return this Builder + */ + public Builder replaceDuplicates(boolean b) { + fieldHolder.replaceDuplicates = b; + return this; + } + + /** + * If >= 1, input is assumed to be organized into + * columns, in the same manner as the temporal pooler AND whenever a new + * prototype is stored, only the start cell (first cell) is stored in any + * bursting column + * @param num + * @return this Builder + */ + public Builder cellsPerCol(int num) { + fieldHolder.cellsPerCol = num; + return this; + } + } + +} diff --git a/src/test/java/org/numenta/nupic/algorithms/KNNClassifierTest.java b/src/test/java/org/numenta/nupic/algorithms/KNNClassifierTest.java new file mode 100644 index 00000000..8062a47c --- /dev/null +++ b/src/test/java/org/numenta/nupic/algorithms/KNNClassifierTest.java @@ -0,0 +1,15 @@ +package org.numenta.nupic.algorithms; + +import static org.junit.Assert.*; + +import org.junit.Test; + + +public class KNNClassifierTest { + + @Test + public void test() { + //fail("Not yet implemented"); + } + +} From c00eff7c91b32995cf42c7af54c7a80fca36e347 Mon Sep 17 00:00:00 2001 From: cogmission Date: Sat, 9 Jan 2016 10:43:55 -0600 Subject: [PATCH 03/31] Added new Enums and Constants --- .../java/org/numenta/nupic/Constants.java | 15 +++++ .../org/numenta/nupic/DistanceMethod.java | 58 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/main/java/org/numenta/nupic/Constants.java create mode 100644 src/main/java/org/numenta/nupic/DistanceMethod.java diff --git a/src/main/java/org/numenta/nupic/Constants.java b/src/main/java/org/numenta/nupic/Constants.java new file mode 100644 index 00000000..556c9877 --- /dev/null +++ b/src/main/java/org/numenta/nupic/Constants.java @@ -0,0 +1,15 @@ +package org.numenta.nupic; + + +public class Constants { + public static final String ADAPTIVE = "adaptive"; + + public enum KNN { + ADAPTIVE("adaptive"); + private String description; + private KNN(String desc) { this.description = desc; } + /** {@inheritDoc} */ + public String toString() { return description; } + } + +} diff --git a/src/main/java/org/numenta/nupic/DistanceMethod.java b/src/main/java/org/numenta/nupic/DistanceMethod.java new file mode 100644 index 00000000..aea4fdca --- /dev/null +++ b/src/main/java/org/numenta/nupic/DistanceMethod.java @@ -0,0 +1,58 @@ +package org.numenta.nupic; + +/** + *

+ * The method used to compute distance between input patterns and prototype patterns. + *

+ * The possible options are: + *