From 249ba0a3f5d050dfc463f37738d87caaa863666a Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Fri, 17 May 2013 21:57:11 +0200 Subject: [PATCH 1/2] move it to clojure 1.5.1, etc. --- .gitignore | 2 ++ project.clj | 23 +++++++++++------- src/infer/classification.clj | 22 ++++++++--------- src/infer/compat.clj | 18 ++++++++++++++ src/infer/core.clj | 7 +++--- src/infer/cross_validation.clj | 13 ++++------ src/infer/features.clj | 19 +++++++-------- src/infer/io.clj | 8 +++---- src/infer/learning.clj | 14 ++++------- src/infer/linear_models.clj | 13 ++++------ src/infer/lsh.clj | 12 ++++------ src/infer/measures.clj | 12 ++++------ src/infer/neighbors.clj | 21 +++++++--------- src/infer/probability.clj | 9 ++++--- src/infer/random_variate.clj | 7 +++--- test/infer/classification_test.clj | 33 ++++++++++++++++---------- test/infer/features_test.clj | 6 ++--- test/infer/information_theory_test.clj | 12 +++++----- test/infer/lsh_test.clj | 7 +++--- test/infer/matrix_test.clj | 27 ++++++++++----------- test/infer/measures_test.clj | 21 +++++++--------- test/infer/streaming_test.clj | 32 ++++++++++++------------- 22 files changed, 172 insertions(+), 166 deletions(-) create mode 100644 src/infer/compat.clj diff --git a/.gitignore b/.gitignore index 2fefebf..da3b3a6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ aws.clj *.project *.settings *.pyc +/.lein-failures +/.lein-repl-history diff --git a/project.clj b/project.clj index 4df8042..e2de4e3 100644 --- a/project.clj +++ b/project.clj @@ -1,13 +1,18 @@ -(defproject infer "1.0-SNAPSHOT" +(defproject infer "1.1-SNAPSHOT" + :min-lein-version "2.0.0" :description "inference and machine learning for clojure" - :dependencies [[org.clojure/clojure "1.2.0-master-SNAPSHOT"] - [org.clojure/clojure-contrib "1.2.0-SNAPSHOT"] - [clojure-csv/clojure-csv "1.1.0"] - [org.apache.commons/commons-math "2.0"] + :dependencies [[org.clojure/clojure "1.5.1"] + [clojure-csv "2.0.0-alpha2" :exclude org.clojure/clojure] + [org.apache.commons/commons-math "2.2"] [ujmp-complete "0.2.4"] [org.apache.mahout/mahout-core "0.3"] [colt/colt "1.2.0"] - [incanter/parallelcolt "0.9.4"]] - :dev-dependencies [[org.clojars.mmcgrana/lein-javac "0.1.0"] - [swank-clojure "1.2.0"] - [lein-clojars "0.5.0"]]) + [net.sourceforge.parallelcolt/parallelcolt "0.10.0"] + [org.clojure/algo.monads "0.1.4" :exclude org.clojure/clojure] + [org.clojure/math.combinatorics "0.0.4"] + [org.clojure/math.numeric-tower "0.0.2"] + [org.clojure/algo.generic "0.1.1"] + ] + :java-source-paths ["src/jvm"] + :jvm-opts ["-Xmx512m"] + ) diff --git a/src/infer/classification.clj b/src/infer/classification.clj index e495a67..a5595c7 100644 --- a/src/infer/classification.clj +++ b/src/infer/classification.clj @@ -4,12 +4,10 @@ Classifiers are maps of classifier-name -> functions, data are maps of feature-name features." - (:use infer.features) - (:use infer.neighbors) - (:use infer.linear-models) - (:use [clojure.contrib.map-utils :only [deep-merge-with]]) - (:use [infer.core :only [safe threshold-to map-map levels-deep all-keys]]) - (:use [infer.probability :only [bucket +cond-prob-tuples]])) + (:use [infer features neighbors compat linear-models] + [infer.core :only [safe threshold-to map-map levels-deep all-keys]] + [infer.probability :only [bucket +cond-prob-tuples]] + )) (defn discretizing-classifiers "Makes a discretizing classifier out of each key-range pair." @@ -57,11 +55,11 @@ (defn map-as-matrix [m] (let [ordered (map sort (vals (sort m)))] - (map (comp vec vals) ordered))) + (mapv (comp vec vals) ordered))) (defn real-precision [confusion-matrix] - (map (fn [v i] - (/ (nth v i) + (mapv (fn [v i] + (/ (float (nth v i)) (apply + v))) confusion-matrix (range 0 (count confusion-matrix)))) @@ -69,14 +67,14 @@ (defn real-recall "Computes recall by class label from confusion matrix." [confusion-matrix] - (real-precision (seq-trans confusion-matrix))) + (real-precision (seq-trans confusion-matrix))) (defn precision "Computes precision by class label from confusion matrix." [m] - (real-precision (map-as-matrix m))) + (real-precision (map-as-matrix m))) (defn recall "Computes recall by class label from confusion matrix." [m] - (real-precision (seq-trans (map-as-matrix m)))) \ No newline at end of file + (real-precision (seq-trans (map-as-matrix m)))) diff --git a/src/infer/compat.clj b/src/infer/compat.clj new file mode 100644 index 0000000..4728cce --- /dev/null +++ b/src/infer/compat.clj @@ -0,0 +1,18 @@ +(ns infer.compat + "Compatibility functions" + ) + +(defn deep-merge-with + "Like merge-with, but merges maps recursively, applying the given fn +only when there's a non-map at a particular level. + +(deepmerge + {:a {:b {:c 1 :d {:x 1 :y 2}} :e 3} :f 4} + {:a {:b {:c 2 :d {:z 9} :z 3} :e 100}}) +-> {:a {:b {:z 3, :c 3, :d {:z 9, :x 1, :y 2}}, :e 103}, :f 4}" + [f & maps] + (apply + (fn m [& maps] + (if (every? map? maps) + (apply merge-with m maps) + (apply f maps))) + maps)) diff --git a/src/infer/core.clj b/src/infer/core.clj index 3ae5adb..57dd304 100644 --- a/src/infer/core.clj +++ b/src/infer/core.clj @@ -1,7 +1,8 @@ (ns infer.core (:import org.apache.commons.math.util.MathUtils) - (:use clojure.contrib.monads) - (:use [clojure.set :only [intersection]])) + (:use clojure.algo.monads + [clojure.set :only [intersection]]) + ) ;;TODO: find tests for this stuff. @@ -296,4 +297,4 @@ (best-by > keyfn coll)) (defn min-by [keyfn coll] - (best-by < keyfn coll)) \ No newline at end of file + (best-by < keyfn coll)) diff --git a/src/infer/cross_validation.clj b/src/infer/cross_validation.clj index ceadaa8..37922e6 100644 --- a/src/infer/cross_validation.clj +++ b/src/infer/cross_validation.clj @@ -1,11 +1,8 @@ (ns infer.cross-validation - (:use infer.features) - (:use infer.neighbors) - (:use infer.linear-models) - (:use [clojure.contrib.seq-utils :only [flatten]]) - (:use [clojure.contrib.map-utils :only [deep-merge-with]]) - (:use [infer.core :only [safe threshold-to map-map levels-deep all-keys]]) - (:use [infer.probability :only [bucket +cond-prob-tuples]])) + (:use [infer features compat neighbors linear-models] + [infer.core :only [safe threshold-to map-map levels-deep all-keys]] + [infer.probability :only [bucket +cond-prob-tuples]]) + ) (defn probs-only "Compute probability from computed counts. @@ -174,4 +171,4 @@ holds each seq of vectors out in turn as the test set, merges the rest as traini discretized-knn to-nn-model nn-confusion-matrix - feature-vecs))) \ No newline at end of file + feature-vecs))) diff --git a/src/infer/features.clj b/src/infer/features.clj index 193663e..92e64d9 100644 --- a/src/infer/features.clj +++ b/src/infer/features.clj @@ -1,15 +1,12 @@ (ns infer.features (:import java.util.Random) - (:use clojure.contrib.combinatorics) - (:use clojure.contrib.math) - (:use clojure.set) - (:use infer.measures) - (:use infer.information-theory) - (:use infer.probability) - (:use infer.matrix) - (:use infer.core) - (:use [clojure.contrib.map-utils :only [deep-merge-with]]) - (:use clojure.set)) + (:use clojure.math.combinatorics + clojure.math.numeric-tower + clojure.set + [infer measures information-theory probability matrix core compat] + ) + + ) ;;TODO: check on all these vec operations. ;;what about pop on ecs and butlast? @@ -201,4 +198,4 @@ (defn marginalize-map [n m] (map-from-vectors - (marginalize n (feature-vectors2 m missing-smoother)))) \ No newline at end of file + (marginalize n (feature-vectors2 m missing-smoother)))) diff --git a/src/infer/io.clj b/src/infer/io.clj index 0c001d9..343c59b 100644 --- a/src/infer/io.clj +++ b/src/infer/io.clj @@ -1,10 +1,10 @@ (ns infer.io - (:use clojure.contrib.duck-streams) - (:use clojure-csv.core) - (:use infer.matrix)) + (:use clojure.java.io + clojure-csv.core ;; TODO: change to data.csv? + infer.matrix)) (defn csv->matrix [path] (let [strings (parse-csv (slurp path))] (matrix (for [row strings :when (not (some #(= "" %) row))] - (map #(Float/parseFloat %) row))))) \ No newline at end of file + (map #(Float/parseFloat %) row))))) diff --git a/src/infer/learning.clj b/src/infer/learning.clj index ab7e27d..e033eb3 100644 --- a/src/infer/learning.clj +++ b/src/infer/learning.clj @@ -1,12 +1,8 @@ (ns infer.learning - (:use clojure.set) - (:use clojure.contrib.math) - (:use infer.core) - (:use infer.matrix) - (:use infer.measures) - (:use infer.probability) - (:use infer.information-theory) - (:use infer.features)) + (:use clojure.set + clojure.math.numeric-tower + [infer core matrix measures probability information-theory features] + )) ;;optimization, regularization, and subset selection ;;TODO: should be split into a few libs @@ -191,4 +187,4 @@ A more robust implementation of the algorithm would also check whether the funct ;; (recur snext enext (+ k 1)))))) -;;http://en.wikipedia.org/wiki/Regularization_(mathematics) \ No newline at end of file +;;http://en.wikipedia.org/wiki/Regularization_(mathematics) diff --git a/src/infer/linear_models.clj b/src/infer/linear_models.clj index 4a53840..9e60584 100644 --- a/src/infer/linear_models.clj +++ b/src/infer/linear_models.clj @@ -1,11 +1,8 @@ (ns infer.linear-models - (:use clojure.contrib.math) - (:use clojure.set) - (:use infer.core) - (:use infer.matrix) - (:use infer.learning) - (:use infer.measures) - (:use infer.probability)) + (:use clojure.math.numeric-tower + clojure.set + [infer core matrix learning measures probability]) + ) (defn vecize-1d "if this is the 1d case, put each calue in a vec." @@ -137,4 +134,4 @@ http://en.wikipedia.org/wiki/Tikhonov_regularization via hard thresholding." [X lambda precision] - ) \ No newline at end of file + ) diff --git a/src/infer/lsh.clj b/src/infer/lsh.clj index 88ddaf0..206a8a0 100644 --- a/src/infer/lsh.clj +++ b/src/infer/lsh.clj @@ -1,8 +1,9 @@ (ns infer.lsh - (:use [clojure.contrib.math :only (floor)]) - (:use [clojure.set :only (union intersection difference)]) - (:import [java.util Random]) - (:use [infer.random-variate :only (random-normal)])) + (:use [clojure.math.numeric-tower :only (floor)] + [clojure.set :only (union intersection difference)] + [infer.random-variate :only (random-normal)]) + (:import [java.util Random]) + ) (defn dot-product [x y] @@ -39,9 +40,6 @@ (fn [data] (floor (/ (+ b (dot-product data v)) r)))) -(defn spherical-l2-hash - "Proposed by Terasawa and Tanaka (2007)") - (defn- apply-hash-ensemble "Takes a list of minhash functions and data." [hash-ensemble data] diff --git a/src/infer/measures.clj b/src/infer/measures.clj index 5c7c57b..36a8b29 100644 --- a/src/infer/measures.clj +++ b/src/infer/measures.clj @@ -1,10 +1,8 @@ (ns infer.measures - (:use clojure.contrib.math) - (:use clojure.contrib.map-utils) - (:use clojure.set) - (:use infer.core) - (:use infer.matrix) - (:use [infer.probability :only [gt lt binary]]) + (:use [infer core compat matrix] + [infer.probability :only [gt lt binary]] + clojure.math.numeric-tower + clojure.set) (:import org.apache.commons.math.stat.StatUtils) (:import [org.apache.commons.math.stat.correlation PearsonsCorrelation Covariance]) @@ -686,4 +684,4 @@ The Levenshtein distance has several simple upper and lower bounds that are usef (reduce + (map #(* % %) (flatten (from-matrix A)))))] - Af)) \ No newline at end of file + Af)) diff --git a/src/infer/neighbors.clj b/src/infer/neighbors.clj index b8bf301..1d20ae3 100644 --- a/src/infer/neighbors.clj +++ b/src/infer/neighbors.clj @@ -1,11 +1,10 @@ (ns infer.neighbors - (:use infer.measures) - (:use infer.core) - (:use infer.features) - (:use clojure.contrib.math) - (:use [clojure.set :only (union intersection difference)]) - (:import [java.util Random]) - (:use [infer.random-variate :only (random-normal)])) + (:use [infer measures core features] + clojure.math.numeric-tower + [infer.random-variate :only (random-normal)] + [clojure.set :only (union intersection difference)]) + (:import [java.util Random]) + ) ;;TODO: is motthing really the right name for this lib? Density estimation? k-NN & kernels? ;;TODO: change sigs to match the matrix apis of xs & ys rather that [xs & ys] @@ -111,12 +110,13 @@ ;;TODO: ;;1. pass the distance fn and weighing fn seperately rahter than composing into weigh prior to calling? ;;for kernels, but weighted mean calc is identical for k-nn -(defn nadaraya-watson-estimator [point weigh points] -"takes a query point, a weight fn, and a seq of points, and returns the weighted sum of the points divided but the sum of the weights. the weigh fn is called with the query point and each point in the points seq. the weigh fn is thus a composition of a weight fn and a distance measure. +(defn nadaraya-watson-estimator + "takes a query point, a weight fn, and a seq of points, and returns the weighted sum of the points divided but the sum of the weights. the weigh fn is called with the query point and each point in the points seq. the weigh fn is thus a composition of a weight fn and a distance measure. http://en.wikipedia.org/wiki/Kernel_regression#Nadaraya-Watson_kernel_regression " + [point weigh points] (let [weights* (weights point weigh points) divisor (sum weights*)] (if (single-class? points) @@ -175,9 +175,6 @@ http://en.wikipedia.org/wiki/Kernel_regression#Nadaraya-Watson_kernel_regression (fn [data] (floor (/ (+ b (dot-product data v)) r)))) -(defn spherical-l2-hash - "Proposed by Terasawa and Tanaka (2007)") - (defn- apply-hash-ensemble "Takes a list of minhash functions and data." [hash-ensemble data] diff --git a/src/infer/probability.clj b/src/infer/probability.clj index 1ec38b4..6267804 100644 --- a/src/infer/probability.clj +++ b/src/infer/probability.clj @@ -1,11 +1,10 @@ (ns infer.probability (:import [java.io File]) (:import [java.util Date Calendar]) - (:use [clojure.set :only [difference]]) - (:use [clojure.contrib.map-utils :only [deep-merge-with]]) - (:use [infer.core :only [tree-comp any?]]) - (:use [infer.core - :only [set-to-unit-map bottom-level? map-map same-length?]])) + (:use [clojure.set :only [difference]] + infer.compat + [infer.core :only [tree-comp any? set-to-unit-map bottom-level? map-map same-length?]]) + ) (defn binary "A function for binary classification that takes a booleavn value and returns diff --git a/src/infer/random_variate.clj b/src/infer/random_variate.clj index 3a23218..01347d4 100644 --- a/src/infer/random_variate.clj +++ b/src/infer/random_variate.clj @@ -1,6 +1,7 @@ (ns infer.random-variate - (:use [clojure.contrib.math :only (expt sqrt)]) - (:use [clojure.contrib.generic.math-functions :only (tan log cos sin)])) + (:use [clojure.math.numeric-tower :only (expt sqrt)] + [clojure.algo.generic.math-functions :only (tan log cos sin)]) + ) (defn exp-rv "Simulate an exponential distribution with @@ -43,4 +44,4 @@ "Generate a lazy sequence of unit normal random variables." [] (let [bm (box-muller)] - (lazy-seq (concat bm (random-normal))))) \ No newline at end of file + (lazy-seq (concat bm (random-normal))))) diff --git a/test/infer/classification_test.clj b/test/infer/classification_test.clj index e707c66..fcadf13 100644 --- a/test/infer/classification_test.clj +++ b/test/infer/classification_test.clj @@ -1,9 +1,16 @@ (ns infer.classification-test - (:use clojure.test) - (:use infer.classification) - (:use infer.cross-validation) - (:use infer.probability) - (:use clojure.contrib.map-utils)) + (:use clojure.test + [infer classification cross-validation probability compat] + )) + +(defn- float= [x y] + (let [epsilon 0.0001 + scale (if (or (zero? x) (zero? y)) 1 (Math/abs x))] + (<= (Math/abs (- x y)) (* scale epsilon)))) + +(defn- float=-seqs [s1 s2] + (apply = (map float= s1 s2)) + ) (deftest classify-one-to-one-item (let [test-fns {:a (present-when (gt 5)) :b (present-when (lt 5))}] @@ -136,31 +143,31 @@ (confusion-matrix (model-from-maps ex1) (first ex2))))) (deftest recall-test - (is (= + (is (float=-seqs [(float (/ 605170 (+ 605170 5032 3377))) - (float (/ 3216 (+ 3216 3571 57663))) - (float (/ 13100 (+ 689 13100 17962)))] + (float (/ 3216 (+ 3216 3571 57663))) + (float (/ 13100 (+ 689 13100 17962)))] (recall {1 {2 689, 0 5032, 1 3216}, 2 {1 3571, 0 3377, 2 13100}, 0 {2 17962, 0 605170, 1 57663}}))) - (is (= [(float (/ 19817 (+ 19817 558))) + (is (float=-seqs [(float (/ 19817 (+ 19817 558))) (float (/ 3960 (+ 3960 1291 198))) (float (/ 2132 (+ 2132 69 274)))] (recall {1 {0 558, 2 274, 1 3960}, 0 {2 69, 1 1291, 0 19817}, 2 {0 0, 1 198, 2 2132}}))) - (is (= [0.5, 0, 0.5] + (is (= [0.5, 0.0, 0.5] (recall {0 {0 5, 1 2, 2 5}, 1 {0 0, 1 0, 2 0}, 2 {0 5, 1 5, 2 5}})))) (deftest precision-test - (is (= [(float (/ 349474 (+ 25078 68131 349474)))] + (is (float=-seqs [(float (/ 349474 (+ 25078 68131 349474)))] (precision {0 {2 25078, 1 68131, 0 349474}}))) - (is (= [(float (/ 349474 (+ 25078 68131 349474))) + (is (float=-seqs [(float (/ 349474 (+ 25078 68131 349474))) (float (/ 9752 (+ 257 9752 89783)))] (precision {0 {2 25078, 1 68131, 0 349474} 1 {2 257, 1 9752, 0 89783}})))) @@ -220,4 +227,4 @@ ;; (deftest cross-validate-smoothing ;; (is (= {0 {2 962, 0 1296, 1 1298}} -;; (cross-validation-kernel-smoother [exs1 exs2])))) \ No newline at end of file +;; (cross-validation-kernel-smoother [exs1 exs2])))) diff --git a/test/infer/features_test.clj b/test/infer/features_test.clj index 518ba1f..fa39498 100644 --- a/test/infer/features_test.clj +++ b/test/infer/features_test.clj @@ -100,11 +100,11 @@ [0 6 0 2 2 2] [0 6 0 2 1 1]]] - (is (= 7 + (is (= 7.0 (counts-when (partial nth-is? 4 #(> % 2)) data))) - (is (= 3 + (is (= 3.0 (counts-when (partial nth-is? 4 #(< % 3)) data))))) @@ -170,4 +170,4 @@ (is (= [[0 1 2 2 2]] (feature-vectors2 example))) (is (= {0.0 {1.0 {2.0 2.0}}} - (marginalize-map [2] {0 {1 {2 {2 2}}}}))))) \ No newline at end of file + (marginalize-map [2] {0 {1 {2 {2 2}}}}))))) diff --git a/test/infer/information_theory_test.clj b/test/infer/information_theory_test.clj index b208d31..aa7b366 100644 --- a/test/infer/information_theory_test.clj +++ b/test/infer/information_theory_test.clj @@ -1,8 +1,8 @@ (ns infer.information-theory-test - (:use clojure.test) - (:use infer.information-theory) - (:use infer.probability) - (:use clojure.contrib.math)) + (:use clojure.test + clojure.math.numeric-tower + [infer information-theory probability]) + ) (defn =within [delta x y] (>= delta (abs (- x y)))) @@ -53,7 +53,7 @@ 2 10 3 5 4 5}}] - (is (= 0 + (is (= 0.0 (mutual-information independent-joint [py pz])))) (let [py {1 3 2 5} @@ -63,4 +63,4 @@ {1 {1 6} 2 {2 5}}] (is (= 1.1660760651114532 - (mutual-information dependent-joint [py pz]))))) \ No newline at end of file + (mutual-information dependent-joint [py pz]))))) diff --git a/test/infer/lsh_test.clj b/test/infer/lsh_test.clj index 9181236..9a00fb0 100644 --- a/test/infer/lsh_test.clj +++ b/test/infer/lsh_test.clj @@ -1,6 +1,7 @@ (ns infer.lsh-test - (:use infer.lsh) - (:use clojure.test)) + (:use infer.lsh + clojure.test) + ) (deftest test-dot-product (is (= 100 (dot-product [5 5 5 5] [5 5 5 5])))) @@ -57,4 +58,4 @@ {[1 2] #{6}, [2 4] #{5 7}}]] (is (= [{[1 1] #{1 2 5}, [2 2] #{3 4}, [2 3] #{6 7}} {[1 2] #{1 6}, [2 3] #{2 3 4}, [2 4] #{5 7}}] - (merge-tables table1 table2))))) \ No newline at end of file + (merge-tables table1 table2))))) diff --git a/test/infer/matrix_test.clj b/test/infer/matrix_test.clj index 45c1f5f..f89acdb 100644 --- a/test/infer/matrix_test.clj +++ b/test/infer/matrix_test.clj @@ -9,9 +9,9 @@ (deftest inc-at-test (let [A (fill 0 3 3)] (inc-at A 0 0) - (is (= 1 (get-at A 0 0))) + (is (= 1.0 (get-at A 0 0))) (inc-at A 2 0 0) - (is (= 3 (get-at A 0 0))))) + (is (= 3.0 (get-at A 0 0))))) (deftest ensure-vecs-test (let [v (ensure-vecs [[1]]) @@ -26,36 +26,35 @@ (let [m (matrix [[1 2 3] [4 5 6]]) single-m (column-matrix [1 2 3 4 5 6])] - (is (= 6 (get-at m 1 2))) - (is (= 6 (get-at single-m 5 0))))) + (is (= 6.0 (get-at m 1 2))) + (is (= 6.0 (get-at single-m 5 0))))) (deftest to-and-from-matrix - (let [a [[1 2 3] [4 5 6]] - A (matrix - [[1 2 3] [4 5 6]]) + (let [a [[1.0 2.0 3.0] [4.0 5.0 6.0]] + A (matrix [[1 2 3] [4 5 6]]) b (from-matrix A)] (is (= a b)))) (deftest to-and-from-sparse-matrix - (let [a [{0 1, 5 2, 9 3} {4 4,9 5, 16 6}] + (let [a [{0 1.0, 5 2.0, 9 3.0} {4 4.0, 9 5.0, 16 6.0}] A (sparse-matrix a) b (from-sparse-2d-matrix A)] (is (= a b)))) (deftest to-and-from-sparse-colt-matrix - (let [a [{0 1, 5 2, 9 3} {4 4,9 5, 16 6}] + (let [a [{0 1.0, 5 2.0, 9 3.0} {4 4.0, 9 5.0, 16 6.0}] A (sparse-colt-matrix a) b (from-sparse-2d-matrix A)] (is (= a b)))) (deftest to-and-from-sparse-pcolt-matrix - (let [a [{0 1, 5 2, 9 3} {4 4,9 5, 16 6}] + (let [a [{0 1.0, 5 2.0, 9 3.0} {4 4.0, 9 5.0, 16 6.0}] A (sparse-pcolt-matrix a) b (from-sparse-2d-matrix A)] (is (= a b)))) (deftest to-and-from-column-matrix - (let [a [1 2 3] + (let [a [1.0 2.0 3.0] A (column-matrix a) b (from-column-matrix A)] @@ -63,8 +62,8 @@ (deftest identity-matrix (let [i (from-matrix (I 2 2))] - (is (= [[1 0] - [0 1]] + (is (= [[1.0 0.0] + [0.0 1.0]] i)))) (deftest create-diagonal-weights @@ -108,4 +107,4 @@ (from-matrix (column-concat (column-matrix [1 2]) (column-matrix [2 3]) - (column-matrix [3 4])))))) \ No newline at end of file + (column-matrix [3 4])))))) diff --git a/test/infer/measures_test.clj b/test/infer/measures_test.clj index 490a969..c0b3807 100644 --- a/test/infer/measures_test.clj +++ b/test/infer/measures_test.clj @@ -4,18 +4,14 @@ (:use infer.measures)) (deftest sum-test - (is (= 9 - (sum [2 3 4])))) + (is (= 9.0 (sum [2 3 4])))) (deftest weighted-sum-test - (is (= 9 - (weighted-sum [2 3 4] [1 1 1]))) - (is (= 4.5 - (weighted-sum [2 3 4] [0.5 0.5 0.5])))) + (is (= 9.0 (weighted-sum [2 3 4] [1 1 1]))) + (is (= 4.5 (weighted-sum [2 3 4] [0.5 0.5 0.5])))) (deftest sample-mean - (is (= 3 - (mean [2 3 4])))) + (is (= 3.0 (mean [2 3 4])))) (deftest stdev-test (is (= 2.138089935299395 @@ -95,8 +91,7 @@ ;; Plugging this into the formula, we calculate, s = (2 ยท 1) / (4 + 4) = 0.25. (deftest dice-string (is - (= 0.25 - (dice-coefficient-str "night" "nacht")))) + (= 0.25 (float (dice-coefficient-str "night" "nacht"))))) (deftest get-ngrams (is (= #{"gh" "ht" "ni" "ig"} @@ -141,7 +136,7 @@ (deftest manhattan (is - (= (+ 1 1 2 1 1) + (= (+ 1 1 2 1 1.0) (manhattan-distance [2 4 3 1 6] [3 5 1 2 5])))) @@ -167,7 +162,7 @@ (is (= 100 (dot-product [5 5 5 5] [5 5 5 5])))) (deftest nuclear - (is (= 10 (nuclear-norm (matrix (I 10 10)))))) + (is (= 10.0 (nuclear-norm (matrix (I 10 10)))))) (deftest frobenius - (is (= 10 (frobenius-norm (matrix (I 100 100)))))) \ No newline at end of file + (is (= 10.0 (frobenius-norm (matrix (I 100 100)))))) diff --git a/test/infer/streaming_test.clj b/test/infer/streaming_test.clj index fe6d3ac..fa4c3a8 100644 --- a/test/infer/streaming_test.clj +++ b/test/infer/streaming_test.clj @@ -12,39 +12,39 @@ (is (Double/isNaN (.enqueue q 15))) (is (Double/isNaN (.enqueue q 3))) (is (Double/isNaN (.enqueue q 17))) - (is (= 15 (.enqueue q 2))) - (is (= 3 (.enqueue q 3))) - (is (= 17 (.enqueue q 4))))) + (is (= 15.0 (.enqueue q 2))) + (is (= 3.0 (.enqueue q 3))) + (is (= 17.0 (.enqueue q 4))))) (deftest enqueueing-state (let [q (RollingQueue. 3) _ (.enqueue q 15) _ (.enqueue q 3)] - (is (= 15 (.getAtLookback q 1))) - (is (= 15 (.getAtIndex q 0))) + (is (= 15.0 (.getAtLookback q 1))) + (is (= 15.0 (.getAtIndex q 0))) (is (= 2 (.getLength q))) (let [_ (.enqueue q 17)] - (is (= 3 (.getAtLookback q 1))) - (is (= 17 (.getAtLookback q 0))) - (is (= 15 (.getAtLookback q 2))) - (is (= 3 (.getAtIndex q 1))) + (is (= 3. (.getAtLookback q 1))) + (is (= 17.0 (.getAtLookback q 0))) + (is (= 15.0 (.getAtLookback q 2))) + (is (= 3.0 (.getAtIndex q 1))) (is (= 3 (.getLength q))) (let [_ (.enqueue q 1)] - (is (= 1 (.getAtLookback q 0))) - (is (= 17 (.getAtLookback q 1))) - (is (= 3 (.getAtLookback q 2))) - (is (= 1 (.getAtIndex q 2))) + (is (= 1.0 (.getAtLookback q 0))) + (is (= 17.0 (.getAtLookback q 1))) + (is (= 3.0 (.getAtLookback q 2))) + (is (= 1.0 (.getAtIndex q 2))) (is (= 3 (.getLength q))))))) (deftest accumulating-mean (let [m (acc (AccumulatingMean.))] - (is (= 1 (m 1))) - (is (= 4 (m 7))) + (is (= 1.0 (m 1))) + (is (= 4.0 (m 7))) (is (= 4.666666666666667 (m 6))) (is (= 4.5 (m 4))) (is (= 4.6 (m 5))))) @@ -55,4 +55,4 @@ (is (Double/isNaN (m 7))) (is (Double/isNaN (m 6))) (is (= 4.5 (m 4))) - (is (= 5.5 (m 5))))) \ No newline at end of file + (is (= 5.5 (m 5))))) From 25dc09a354de72498a73f8deb0aeacef212257a7 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Fri, 17 May 2013 22:01:09 +0200 Subject: [PATCH 2/2] remove mahout from dependencies & code --- project.clj | 10 +++++----- src/infer/matrix.clj | 18 ------------------ 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/project.clj b/project.clj index e2de4e3..bd0d1af 100644 --- a/project.clj +++ b/project.clj @@ -5,13 +5,13 @@ [clojure-csv "2.0.0-alpha2" :exclude org.clojure/clojure] [org.apache.commons/commons-math "2.2"] [ujmp-complete "0.2.4"] - [org.apache.mahout/mahout-core "0.3"] - [colt/colt "1.2.0"] + [colt/colt "1.2.0"] [net.sourceforge.parallelcolt/parallelcolt "0.10.0"] [org.clojure/algo.monads "0.1.4" :exclude org.clojure/clojure] - [org.clojure/math.combinatorics "0.0.4"] - [org.clojure/math.numeric-tower "0.0.2"] - [org.clojure/algo.generic "0.1.1"] + [org.clojure/math.combinatorics "0.0.4" :exclude org.clojure/clojure] + [org.clojure/math.numeric-tower "0.0.2" :exclude org.clojure/clojure] + [org.clojure/algo.generic "0.1.1" :exclude org.clojure/clojure] + ;; [org.apache.mahout/mahout-math "0.7"] ] :java-source-paths ["src/jvm"] :jvm-opts ["-Xmx512m"] diff --git a/src/infer/matrix.clj b/src/infer/matrix.clj index cffc0af..aac851c 100644 --- a/src/infer/matrix.clj +++ b/src/infer/matrix.clj @@ -7,7 +7,6 @@ (:import [org.ujmp.core.matrix Matrix2D]) (:import [org.ujmp.colt ColtSparseDoubleMatrix2D]) -;; (:import [org.apache.mahout.core SparseMatrix]) (:import [org.ujmp.parallelcolt ParallelColtSparseDoubleMatrix2D]) (:import [org.ujmp.core.doublematrix @@ -69,23 +68,6 @@ (defn sparse-pcolt-matrix [xs] (sparse-matrix* xs #(ParallelColtSparseDoubleMatrix2D. %))) -;; (defn sparse-mahout-matrix [xs] -;; (let [n-rows (count xs) -;; cols (reduce (fn [acc row] -;; (union acc (into #{} (keys row)))) -;; #{} -;; xs) -;; m (SparseMatrix. (long-array [n-rows (+ (apply max cols) 1)])) -;; row-indices (range 0 (count xs))] -;; (dorun -;; (map (fn [row r] -;; (dorun (map (fn [[c v]] -;; (.setQuick m r c v) -;; row))) -;; xs -;; row-indices)) -;; m))) - (defn from-sparse-matrix [m] (map (fn [coord] (conj (into [] (map int coord)) (.getDouble m coord)))