From 61e031e66c06d5b8c320b4bc473508623df10ead Mon Sep 17 00:00:00 2001
From: Benjamin James <benjamin-james@utulsa.edu>
Date: Mon, 22 Oct 2018 14:27:11 -0500
Subject: [PATCH] initial commit

---
 Makefile                                      |   19 +
 README                                        |   74 +
 src/Makefile                                  |  175 ++
 src/RepeatsDetector.cpp                       |  583 ++++++
 src/cluster/Makefile                          |   31 +
 src/cluster/src/Center.h                      |   42 +
 src/cluster/src/ClusterFactory.cpp            | 1024 +++++++++
 src/cluster/src/ClusterFactory.h              |   82 +
 src/cluster/src/DivergencePoint.cpp           |  284 +++
 src/cluster/src/DivergencePoint.h             |   89 +
 src/cluster/src/Feature.cpp                   | 1823 +++++++++++++++++
 src/cluster/src/Feature.h                     |  380 ++++
 src/cluster/src/GLM.cpp                       |   66 +
 src/cluster/src/GLM.h                         |   31 +
 src/cluster/src/HandleSeq.cpp                 |  155 ++
 src/cluster/src/HandleSeq.h                   |   77 +
 src/cluster/src/Histogram.cpp                 |  195 ++
 src/cluster/src/Histogram.h                   |   80 +
 src/cluster/src/Loader.cpp                    |  111 +
 src/cluster/src/Loader.h                      |   73 +
 src/cluster/src/LogTable.cpp                  |   41 +
 src/cluster/src/LogTable.h                    |   20 +
 src/cluster/src/Mat.h                         |   73 +
 src/cluster/src/Matrix.cpp                    |  360 ++++
 src/cluster/src/Matrix.h                      |   52 +
 src/cluster/src/MultiMute.cpp                 |  455 ++++
 src/cluster/src/MultiMute.h                   |  142 ++
 src/cluster/src/NearestNeighbor.h             |   52 +
 src/cluster/src/Point.h                       |   83 +
 src/cluster/src/Predictor.cpp                 |  837 ++++++++
 src/cluster/src/Predictor.h                   |   78 +
 src/cluster/src/Progress.cpp                  |   65 +
 src/cluster/src/Progress.h                    |   29 +
 src/cluster/src/Random.h                      |   22 +
 src/cluster/src/Runner.cpp                    |  397 ++++
 src/cluster/src/Runner.h                      |   44 +
 src/cluster/src/SingMute.cpp                  |  116 ++
 src/cluster/src/SingMute.h                    |   48 +
 src/cluster/src/SingleFeature.cpp             |   50 +
 src/cluster/src/SingleFeature.h               |   26 +
 src/cluster/src/SingleFileLoader.cpp          |   84 +
 src/cluster/src/SingleFileLoader.h            |   29 +
 src/cluster/src/SingleMute.cpp                |  221 ++
 src/cluster/src/SingleMute.h                  |   89 +
 src/cluster/src/Trainer.cpp                   |  930 +++++++++
 src/cluster/src/Trainer.h                     |   67 +
 src/cluster/src/bvec.cpp                      |  332 +++
 src/cluster/src/bvec.h                        |   69 +
 src/cluster/src/bvec_iterator.cpp             |   28 +
 src/cluster/src/bvec_iterator.h               |   84 +
 src/cluster/src/main.cpp                      |   12 +
 src/cluster/src/needleman_wunsch.cpp          |  153 ++
 src/cluster/src/needleman_wunsch.h            |   43 +
 src/exception/FileDoesNotExistException.cpp   |   25 +
 src/exception/FileDoesNotExistException.h     |   23 +
 src/exception/InvalidInputException.cpp       |   24 +
 src/exception/InvalidInputException.h         |   23 +
 src/exception/InvalidOperationException.cpp   |   19 +
 src/exception/InvalidOperationException.h     |   26 +
 .../InvalidOrderOfOperationsException.cpp     |   24 +
 .../InvalidOrderOfOperationsException.h       |   23 +
 src/exception/InvalidScoreException.cpp       |   24 +
 src/exception/InvalidScoreException.h         |   23 +
 src/exception/InvalidStateException.cpp       |   25 +
 src/exception/InvalidStateException.h         |   23 +
 src/nonltr/ChromDetector.cpp                  |   41 +
 src/nonltr/ChromDetector.h                    |   29 +
 src/nonltr/ChromDetectorMaxima.cpp            |   94 +
 src/nonltr/ChromDetectorMaxima.h              |   47 +
 src/nonltr/ChromListMaker.cpp                 |  123 ++
 src/nonltr/ChromListMaker.h                   |   38 +
 src/nonltr/Chromosome.cpp                     |  308 +++
 src/nonltr/Chromosome.h                       |   78 +
 src/nonltr/ChromosomeOneDigit.cpp             |  246 +++
 src/nonltr/ChromosomeOneDigit.h               |   43 +
 src/nonltr/ChromosomeRandom.cpp               |  363 ++++
 src/nonltr/ChromosomeRandom.h                 |   51 +
 src/nonltr/DetectorMaxima.cpp                 |  518 +++++
 src/nonltr/DetectorMaxima.h                   |   77 +
 src/nonltr/EnrichmentMarkovView.cpp           |  217 ++
 src/nonltr/EnrichmentMarkovView.h             |   69 +
 src/nonltr/HMM.cpp                            |  630 ++++++
 src/nonltr/HMM.h                              |  103 +
 src/nonltr/IChromosome.h                      |   28 +
 src/nonltr/ITableView.h                       |   34 +
 src/nonltr/KmerHashTable.cpp                  |  445 ++++
 src/nonltr/KmerHashTable.h                    |   83 +
 src/nonltr/LocationList.cpp                   |  153 ++
 src/nonltr/LocationList.h                     |   53 +
 src/nonltr/LocationListCollection.cpp         |  101 +
 src/nonltr/LocationListCollection.h           |   41 +
 src/nonltr/Scanner.cpp                        |  379 ++++
 src/nonltr/Scanner.h                          |   71 +
 src/nonltr/Scorer.cpp                         |  143 ++
 src/nonltr/Scorer.h                           |   54 +
 src/nonltr/TableBuilder.cpp                   |  121 ++
 src/nonltr/TableBuilder.h                     |   68 +
 src/nonltr/Trainer.cpp                        |  278 +++
 src/nonltr/Trainer.h                          |   80 +
 src/utility/AffineId.cpp                      |  212 ++
 src/utility/AffineId.h                        |   50 +
 src/utility/EmptyLocation.cpp                 |   53 +
 src/utility/EmptyLocation.h                   |   35 +
 src/utility/GlobAlignE.cpp                    |  317 +++
 src/utility/GlobAlignE.h                      |   58 +
 src/utility/ILocation.h                       |   29 +
 src/utility/LCSLen.cpp                        |  103 +
 src/utility/LCSLen.h                          |   37 +
 src/utility/Location.cpp                      |   74 +
 src/utility/Location.h                        |   41 +
 src/utility/Util.cpp                          |  347 ++++
 src/utility/Util.h                            |   79 +
 112 files changed, 17449 insertions(+)
 create mode 100644 Makefile
 create mode 100644 README
 create mode 100644 src/Makefile
 create mode 100644 src/RepeatsDetector.cpp
 create mode 100644 src/cluster/Makefile
 create mode 100644 src/cluster/src/Center.h
 create mode 100644 src/cluster/src/ClusterFactory.cpp
 create mode 100644 src/cluster/src/ClusterFactory.h
 create mode 100644 src/cluster/src/DivergencePoint.cpp
 create mode 100644 src/cluster/src/DivergencePoint.h
 create mode 100644 src/cluster/src/Feature.cpp
 create mode 100644 src/cluster/src/Feature.h
 create mode 100644 src/cluster/src/GLM.cpp
 create mode 100644 src/cluster/src/GLM.h
 create mode 100644 src/cluster/src/HandleSeq.cpp
 create mode 100644 src/cluster/src/HandleSeq.h
 create mode 100644 src/cluster/src/Histogram.cpp
 create mode 100644 src/cluster/src/Histogram.h
 create mode 100644 src/cluster/src/Loader.cpp
 create mode 100644 src/cluster/src/Loader.h
 create mode 100644 src/cluster/src/LogTable.cpp
 create mode 100644 src/cluster/src/LogTable.h
 create mode 100644 src/cluster/src/Mat.h
 create mode 100644 src/cluster/src/Matrix.cpp
 create mode 100644 src/cluster/src/Matrix.h
 create mode 100644 src/cluster/src/MultiMute.cpp
 create mode 100644 src/cluster/src/MultiMute.h
 create mode 100644 src/cluster/src/NearestNeighbor.h
 create mode 100644 src/cluster/src/Point.h
 create mode 100644 src/cluster/src/Predictor.cpp
 create mode 100644 src/cluster/src/Predictor.h
 create mode 100644 src/cluster/src/Progress.cpp
 create mode 100644 src/cluster/src/Progress.h
 create mode 100644 src/cluster/src/Random.h
 create mode 100644 src/cluster/src/Runner.cpp
 create mode 100644 src/cluster/src/Runner.h
 create mode 100644 src/cluster/src/SingMute.cpp
 create mode 100644 src/cluster/src/SingMute.h
 create mode 100644 src/cluster/src/SingleFeature.cpp
 create mode 100644 src/cluster/src/SingleFeature.h
 create mode 100644 src/cluster/src/SingleFileLoader.cpp
 create mode 100644 src/cluster/src/SingleFileLoader.h
 create mode 100644 src/cluster/src/SingleMute.cpp
 create mode 100644 src/cluster/src/SingleMute.h
 create mode 100644 src/cluster/src/Trainer.cpp
 create mode 100644 src/cluster/src/Trainer.h
 create mode 100644 src/cluster/src/bvec.cpp
 create mode 100644 src/cluster/src/bvec.h
 create mode 100644 src/cluster/src/bvec_iterator.cpp
 create mode 100644 src/cluster/src/bvec_iterator.h
 create mode 100644 src/cluster/src/main.cpp
 create mode 100644 src/cluster/src/needleman_wunsch.cpp
 create mode 100644 src/cluster/src/needleman_wunsch.h
 create mode 100644 src/exception/FileDoesNotExistException.cpp
 create mode 100644 src/exception/FileDoesNotExistException.h
 create mode 100644 src/exception/InvalidInputException.cpp
 create mode 100644 src/exception/InvalidInputException.h
 create mode 100644 src/exception/InvalidOperationException.cpp
 create mode 100644 src/exception/InvalidOperationException.h
 create mode 100644 src/exception/InvalidOrderOfOperationsException.cpp
 create mode 100644 src/exception/InvalidOrderOfOperationsException.h
 create mode 100644 src/exception/InvalidScoreException.cpp
 create mode 100644 src/exception/InvalidScoreException.h
 create mode 100644 src/exception/InvalidStateException.cpp
 create mode 100644 src/exception/InvalidStateException.h
 create mode 100644 src/nonltr/ChromDetector.cpp
 create mode 100644 src/nonltr/ChromDetector.h
 create mode 100644 src/nonltr/ChromDetectorMaxima.cpp
 create mode 100644 src/nonltr/ChromDetectorMaxima.h
 create mode 100644 src/nonltr/ChromListMaker.cpp
 create mode 100644 src/nonltr/ChromListMaker.h
 create mode 100644 src/nonltr/Chromosome.cpp
 create mode 100644 src/nonltr/Chromosome.h
 create mode 100644 src/nonltr/ChromosomeOneDigit.cpp
 create mode 100644 src/nonltr/ChromosomeOneDigit.h
 create mode 100644 src/nonltr/ChromosomeRandom.cpp
 create mode 100644 src/nonltr/ChromosomeRandom.h
 create mode 100644 src/nonltr/DetectorMaxima.cpp
 create mode 100644 src/nonltr/DetectorMaxima.h
 create mode 100644 src/nonltr/EnrichmentMarkovView.cpp
 create mode 100644 src/nonltr/EnrichmentMarkovView.h
 create mode 100644 src/nonltr/HMM.cpp
 create mode 100644 src/nonltr/HMM.h
 create mode 100644 src/nonltr/IChromosome.h
 create mode 100644 src/nonltr/ITableView.h
 create mode 100644 src/nonltr/KmerHashTable.cpp
 create mode 100644 src/nonltr/KmerHashTable.h
 create mode 100644 src/nonltr/LocationList.cpp
 create mode 100644 src/nonltr/LocationList.h
 create mode 100644 src/nonltr/LocationListCollection.cpp
 create mode 100644 src/nonltr/LocationListCollection.h
 create mode 100644 src/nonltr/Scanner.cpp
 create mode 100644 src/nonltr/Scanner.h
 create mode 100644 src/nonltr/Scorer.cpp
 create mode 100644 src/nonltr/Scorer.h
 create mode 100644 src/nonltr/TableBuilder.cpp
 create mode 100644 src/nonltr/TableBuilder.h
 create mode 100644 src/nonltr/Trainer.cpp
 create mode 100644 src/nonltr/Trainer.h
 create mode 100644 src/utility/AffineId.cpp
 create mode 100644 src/utility/AffineId.h
 create mode 100644 src/utility/EmptyLocation.cpp
 create mode 100644 src/utility/EmptyLocation.h
 create mode 100644 src/utility/GlobAlignE.cpp
 create mode 100644 src/utility/GlobAlignE.h
 create mode 100644 src/utility/ILocation.h
 create mode 100644 src/utility/LCSLen.cpp
 create mode 100644 src/utility/LCSLen.h
 create mode 100644 src/utility/Location.cpp
 create mode 100644 src/utility/Location.h
 create mode 100644 src/utility/Util.cpp
 create mode 100644 src/utility/Util.h

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2e611c1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,19 @@
+all: bin/Red.o bin/meshclust2
+
+bin/Red.o:
+	mkdir -p bin
+	mkdir -p bin/exception
+	mkdir -p bin/nonltr
+	mkdir -p bin/utility
+	$(MAKE) -C src
+bin/meshclust2: bin/Red.o
+	$(MAKE) -C src/cluster
+	cp src/cluster/meshclust2 bin
+
+clean:
+	$(MAKE) clean -C src
+	$(MAKE) clean -C src/cluster
+	$(RM) -r bin
+
+rebuild: clean all
+.PHONY: all clean
diff --git a/README b/README
new file mode 100644
index 0000000..7b3e7cd
--- /dev/null
+++ b/README
@@ -0,0 +1,74 @@
+MeShClust2
+Release version
+
+Requirements: g++ 4.9.1 or later, requires Homebrew on Mac OS X
+
+Compilation using g++ (homebrew) and GNU Make on Mac OS X
+CXX=g++-7 make
+
+see: https://stackoverflow.com/questions/29057437/compile-openmp-programs-with-gcc-compiler-on-os-x-yosemite
+
+
+Linux/Unix compilation:
+make
+
+Usage: bin/meshclust2 --id 0.x [OPTIONS] *.fasta
+
+--id  The most important parameter, --id, controls the identity cutoff of the sequences.
+      Needs to be between 0 and 1.
+      If it is not specified, an identity of 0.9 is used.
+
+--kmer decides the size of the kmers. It is by default automatically decided by average sequence length,
+       but if provided, MeShClust can speed up a little by not having to find the largest sequence length.
+       Increasing kmer size can increase accuracy, but increases memory consumption.
+
+--mut-type {single, both, nonsingle-typical, nonsingle-all, all-but-reversion, all-but-translocation}
+	   changes the mutation generation algorithm. By default, "single" is used, utilizing only
+	   single point mutations. On low identity data sets, "both", which includes single mutations
+	   and block mutations, is preferable. The option "nonsingle-typical" uses only block mutations,
+	   disallowing single point mutations. Other options include "all", which includes single,
+	   block, and nontypical mutations translocation and reversion.
+
+--feat    determines the combinations of features to be used. By default, "fast" allows 9 fast combinations
+	  to be selected from. "slow" adds 2 slower features which include logarithm based features,
+	  and "extraslow" includes 33 total features used in a previous study.
+
+--min-feat (default 3) sets the minimum feature pairs to be used. If set to 2, at least 2 feature pairs
+	   will be used. Recall that features include pairwise combinations of the "feat" option.
+
+--max-feat (default 5) sets the maximum feature pairs to be used. Diminishing returns appears quickly,
+	   so a very large maximum is not advised.
+
+--sample selects the total number of sequences used for both training and testing.
+	 300 is the default value. Each sequence generates 10 synthetic mutants.
+	 That is, --sample 300 provides 3000 training pairs and 3000 testing pairs.
+
+--min-id  (default 0.35) sets the lower bound for mutation identity scores to be calculated. Shouldn't need
+	  to be set normally, as lower identites take much longer, especially with single mutations only.
+
+--threads sets the number of threads to be used. By default OpenMP uses the number of available cores
+	  on your machine, but this parameter overwrites that.
+
+--output specifies the output file, in CD-HIT's CLSTR format, described below:
+	 A '>Cluster ' followed by an increasing index designates a cluster.
+	 Otherwise, the sequence is printed out.
+	 A '*' at the end of a sequence designates the center of the cluster.
+	 An example of a small data set:
+
+	 >Cluster 0
+	 0       993nt, >seq128 template_6... *
+	 >Cluster 1
+	 0       1043nt, >seq235 template_10...
+	 1       1000nt, >seq216 template_10... *
+	 2       1015nt, >seq237 template_10...
+
+
+--delta decides how many clusters are looked around in the final clustering stage.
+	Increasing it creates more accuracy, but takes more time. Default value is 5.
+
+--iterations specifies how many iterations in the final stage of merging are done until convergence.
+	     Default value is 15.
+
+
+
+If the argument is not listed here, it is interpreted as an input (FASTA format) file.
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..3013ed0
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,175 @@
+# CXX = /usr/bin/c++
+CXX ?= g++
+
+CXXFLAGS =	-O3 -g -fmessage-length=0 -Wall -march=native -std=c++11
+
+#
+# Objects
+#
+
+ORed  = ../bin/Red.o
+
+# Exception
+OInvalidInputException  = ../bin/exception/InvalidInputException.o
+OInvalidStateException  = ../bin/exception/InvalidStateException.o
+OFileDoesNotExistException  = ../bin/exception/FileDoesNotExistException.o
+OInvalidOrderOfOperationsException  = ../bin/exception/InvalidOrderOfOperationsException.o
+OInvalidScoreException  = ../bin/exception/InvalidScoreException.o
+OInvalidOperationException = ../bin/exception/InvalidOperationException.o
+
+# Utility
+OUtil = ../bin/utility/Util.o
+OLocation = ../bin/utility/Location.o
+OEmptyLocation = ../bin/utility/EmptyLocation.o
+OLCSLen = ../bin/utility/LCSLen.o
+OAffineId = ../bin/utility/AffineId.o
+OGlobAlignE = ../bin/utility/GlobAlignE.o
+
+# Non TR
+OChromosome  = ../bin/nonltr/Chromosome.o
+OChromosomeOneDigit = ../bin/nonltr/ChromosomeOneDigit.o
+OChromosomeRandom = ../bin/nonltr/ChromosomeRandom.o
+OChromListMaker = ../bin/nonltr/ChromListMaker.o
+OTableBuilder = ../bin/nonltr/TableBuilder.o
+OScorer = ../bin/nonltr/Scorer.o
+ODetectorMaxima = ../bin/nonltr/DetectorMaxima.o
+OChromDetectorMaxima =  ../bin/nonltr/ChromDetectorMaxima.o
+OHMM = ../bin/nonltr/HMM.o
+OScanner = ../bin/nonltr/Scanner.o
+OTrainer = ../bin/nonltr/Trainer.o
+OLocationList = ../bin/nonltr/LocationList.o
+OLocationListCollection = ../bin/nonltr/LocationListCollection.o
+
+OBJS = $(ORed) $(OInvalidInputException) $(OInvalidStateException) $(OFileDoesNotExistException) $(OInvalidOrderOfOperationsException) $(OInvalidOperationException) $(OInvalidScoreException) $(OUtil) $(OLocation) $(OEmptyLocation) $(OChromosome) $(OChromosomeOneDigit) $(OChromosomeRandom) $(OChromListMaker) $(OTableBuilder) $(OScorer) $(ODetectorMaxima) $(OChromDetector) $(OChromDetectorMaxima) $(OHMM) $(OScanner) $(OTrainer) $(OLocationList) $(OLocationListCollection) $(OLCSLen) $(OAffineId) $(OGlobAlignE)
+
+#
+# Target
+#
+
+TRed = ../bin/Red
+
+#
+# Make RepeatsDetector
+#
+
+$(TRed): $(OBJS)
+	$(CXX) -o $(TRed) $(OBJS)
+
+#
+# RepeatsDetector
+#
+
+$(ORed): RepeatsDetector.cpp nonltr/KmerHashTable.h nonltr/KmerHashTable.cpp nonltr/TableBuilder.h nonltr/HMM.h nonltr/Scanner.h nonltr/Trainer.h utility/Util.h
+	$(CXX) $(CXXFLAGS) -c RepeatsDetector.cpp -o $(ORed)
+
+#
+# Exception
+#
+$(OInvalidInputException): exception/InvalidInputException.cpp exception/InvalidInputException.h
+	$(CXX) $(CXXFLAGS) -c exception/InvalidInputException.cpp -o $(OInvalidInputException)
+
+$(OInvalidStateException): exception/InvalidStateException.cpp exception/InvalidStateException.h
+	$(CXX) $(CXXFLAGS) -c exception/InvalidStateException.cpp -o $(OInvalidStateException)
+
+$(OFileDoesNotExistException): exception/FileDoesNotExistException.cpp exception/FileDoesNotExistException.h
+	$(CXX) $(CXXFLAGS) -c exception/FileDoesNotExistException.cpp -o $(OFileDoesNotExistException)
+
+$(OInvalidOrderOfOperationsException): exception/InvalidOrderOfOperationsException.cpp exception/InvalidOrderOfOperationsException.h
+	$(CXX) $(CXXFLAGS) -c exception/InvalidOrderOfOperationsException.cpp -o $(OInvalidOrderOfOperationsException)
+
+$(OInvalidScoreException): exception/InvalidScoreException.cpp exception/InvalidScoreException.h
+	$(CXX) $(CXXFLAGS) -c exception/InvalidScoreException.cpp -o $(OInvalidScoreException)
+
+$(OInvalidOperationException): exception/InvalidOperationException.cpp exception/InvalidOperationException.h
+	$(CXX) $(CXXFLAGS) -c exception/InvalidOperationException.cpp -o $(OInvalidOperationException)
+
+#
+# Utility
+#
+
+$(OUtil): utility/Util.cpp utility/Util.h utility/Location.h exception/FileDoesNotExistException.h
+	$(CXX) $(CXXFLAGS) -c utility/Util.cpp -o $(OUtil)
+
+$(OLocation): utility/Location.cpp utility/Location.h utility/ILocation.h exception/InvalidInputException.h utility/Util.h
+	$(CXX) $(CXXFLAGS) -c utility/Location.cpp -o $(OLocation)
+
+$(OEmptyLocation): utility/EmptyLocation.cpp utility/EmptyLocation.h utility/ILocation.h exception/InvalidOperationException.h
+	$(CXX) $(CXXFLAGS) -c utility/EmptyLocation.cpp -o $(OEmptyLocation)
+
+$(OLCSLen): utility/LCSLen.cpp utility/LCSLen.h
+	$(CXX) $(CXXFLAGS) -c utility/LCSLen.cpp -o $(OLCSLen)
+
+$(OAffineId): utility/AffineId.cpp utility/AffineId.h
+	$(CXX) $(CXXFLAGS) -c utility/AffineId.cpp -o $(OAffineId)
+
+$(OGlobAlignE): utility/GlobAlignE.cpp utility/GlobAlignE.h
+	$(CXX) $(CXXFLAGS) -c utility/GlobAlignE.cpp -o $(OGlobAlignE)
+#
+# Non LTR
+#
+
+$(OChromosome): nonltr/Chromosome.cpp nonltr/Chromosome.h nonltr/IChromosome.h utility/Util.h exception/InvalidInputException.h exception/InvalidOperationException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/Chromosome.cpp -o $(OChromosome)
+
+$(OChromosomeOneDigit): nonltr/ChromosomeOneDigit.cpp nonltr/ChromosomeOneDigit.h nonltr/Chromosome.h exception/InvalidInputException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/ChromosomeOneDigit.cpp	-o $(OChromosomeOneDigit)
+
+$(OChromosomeRandom): nonltr/ChromosomeRandom.cpp nonltr/ChromosomeRandom.h nonltr/IChromosome.h exception/InvalidInputException.h exception/InvalidStateException.h utility/Util.h
+	$(CXX) $(CXXFLAGS) -c nonltr/ChromosomeRandom.cpp -o $(OChromosomeRandom)
+
+$(OTableBuilder): nonltr/TableBuilder.cpp nonltr/TableBuilder.h utility/Util.h nonltr/ChromosomeOneDigit.h nonltr/ITableView.h nonltr/KmerHashTable.h nonltr/KmerHashTable.cpp nonltr/EnrichmentMarkovView.h nonltr/EnrichmentMarkovView.cpp exception/InvalidStateException.h nonltr/ChromListMaker.h nonltr/IChromosome.h
+	$(CXX) $(CXXFLAGS) -c nonltr/TableBuilder.cpp -o $(OTableBuilder)
+
+$(OScorer): nonltr/Scorer.cpp nonltr/Scorer.h nonltr/ChromosomeOneDigit.h utility/Util.h exception/InvalidStateException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/Scorer.cpp -o $(OScorer)
+
+$(ODetectorMaxima): nonltr/DetectorMaxima.cpp nonltr/DetectorMaxima.h utility/ILocation.h exception/InvalidStateException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/DetectorMaxima.cpp -o $(ODetectorMaxima)
+
+$(OChromDetectorMaxima): nonltr/ChromDetectorMaxima.cpp nonltr/ChromDetectorMaxima.h nonltr/DetectorMaxima.h nonltr/ChromosomeOneDigit.h utility/Util.h utility/ILocation.h utility/Location.h
+	$(CXX) $(CXXFLAGS) -c nonltr/ChromDetectorMaxima.cpp -o $(OChromDetectorMaxima)
+
+$(OHMM): nonltr/HMM.cpp nonltr/HMM.h utility/ILocation.h exception/InvalidStateException.h exception/InvalidInputException.h exception/FileDoesNotExistException.h exception/InvalidOperationException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/HMM.cpp -o $(OHMM)
+
+$(OScanner): nonltr/Scanner.cpp nonltr/Scanner.h nonltr/Chromosome.h nonltr/ChromosomeOneDigit.h nonltr/HMM.h nonltr/ITableView.h nonltr/Scorer.h utility/Util.h utility/ILocation.h exception/InvalidInputException.h exception/InvalidStateException.h exception/FileDoesNotExistException.h exception/InvalidOperationException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/Scanner.cpp -o $(OScanner)
+
+$(OTrainer): nonltr/Trainer.cpp nonltr/Trainer.h nonltr/TableBuilder.h nonltr/KmerHashTable.h nonltr/KmerHashTable.cpp nonltr/HMM.h nonltr/ChromDetectorMaxima.h nonltr/Scorer.h nonltr/ChromListMaker.h utility/Util.h nonltr/LocationListCollection.h
+	$(CXX) $(CXXFLAGS) -c nonltr/Trainer.cpp -o $(OTrainer)
+
+$(OChromListMaker): nonltr/ChromListMaker.cpp nonltr/ChromListMaker.h nonltr/Chromosome.h nonltr/ChromosomeOneDigit.h utility/Util.h
+	$(CXX) $(CXXFLAGS) -c nonltr/ChromListMaker.cpp -o $(OChromListMaker)
+
+$(OCluster): nonltr/Cluster.cpp nonltr/Cluster.h utility/Util.h exception/InvalidStateException.h exception/InvalidInputException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/Cluster.cpp -o $(OCluster)
+
+$(OLocationList): nonltr/LocationList.cpp nonltr/LocationList.h utility/ILocation.h utility/Location.h exception/InvalidStateException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/LocationList.cpp -o $(OLocationList)
+
+$(OLocationListCollection): nonltr/LocationListCollection.cpp nonltr/LocationListCollection.h utility/Location.h exception/InvalidStateException.h
+	$(CXX) $(CXXFLAGS) -c nonltr/LocationListCollection.cpp -o $(OLocationListCollection)
+
+
+#
+# Make binary directories
+#
+
+red: $(TRed)
+
+#
+# Make Red
+#
+
+bin:
+	mkdir ../bin
+	mkdir ../bin/exception
+	mkdir ../bin/utility
+	mkdir ../bin/nonltr
+
+#
+# Make clean
+#
+
+clean:
+	rm -f ../bin/*.o ../bin/exception/*.o ../bin/ms/*.o ../bin/nonltr/*.o ../bin/test/*.o ../bin/utility/*.o ../bin/tr/*.o *.o $(TRed)
diff --git a/src/RepeatsDetector.cpp b/src/RepeatsDetector.cpp
new file mode 100644
index 0000000..443cf24
--- /dev/null
+++ b/src/RepeatsDetector.cpp
@@ -0,0 +1,583 @@
+//============================================================================
+// Name        : RepeatsDetector.cpp
+// Author      : Hani Zakaria Girgis, PhD
+// Version     :
+// Description : Red (RepeatsDetector)
+//============================================================================
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+#include <cmath>
+#include <vector>
+
+#include "nonltr/Trainer.h"
+#include "nonltr/KmerHashTable.h"
+#include "nonltr/TableBuilder.h"
+#include "nonltr/HMM.h"
+#include "nonltr/Scanner.h"
+#include "nonltr/ChromListMaker.h"
+#include "utility/Util.h"
+
+using namespace std;
+using namespace nonltr;
+using namespace utility;
+using namespace exception;
+
+/**
+ * Parameters
+ */
+// Required parameters
+const static string LEN_PRM = string("-len"); // k - length of the motif.
+
+// Train and Scan the whole genome
+const static string GNM_PRM = string("-gnm"); // Train and scan.
+const static string ORD_PRM = string("-ord"); // order of background markov chain.
+const static string GAU_PRM = string("-gau"); // Half width of the Gaussian mask.
+const static string THR_PRM = string("-thr"); // The threshold part of the definition of non-repeats
+const static string MIN_PRM = string("-min"); // The minimum number of observations
+
+// Scan using pre-calculated scores and a trained HMM
+const static string HMI_PRM = string("-hmi"); // File including the trained model
+const static string SEQ_PRM = string("-seq"); // File including the sequence
+const static string SCI_PRM = string("-sci"); // File including the scores of the sequence
+
+// Output options with -gnm only
+const static string TBL_PRM = string("-tbl"); // Write the k-mer to the provided file
+const static string SCO_PRM = string("-sco"); // Write the scores to the
+const static string HMO_PRM = string("-hmo"); // The Markov model is writen to this file.
+const static string CND_PRM = string("-cnd"); // Write candidate region to a directory
+
+// Output options with -gnm and -hmm
+const static string MSK_PRM = string("-msk"); // Write masked sequence(s) to file or directory
+const static string RPT_PRM = string("-rpt"); // Write coordinates to file or directory
+const static string DIR_PRM = string("-dir"); // Read additional sequences(.fa) or scores (.sc) under directory
+const static string FRM_PRM = string("-frm"); // Format of the ouput
+
+void drive(map<string, string> * const param) {
+  // Delete old output files
+  if (param->count(MSK_PRM) > 0) {
+    if (param->count(GNM_PRM) > 0) {
+      cout << "Deleting pre-existing files under " << param->at(MSK_PRM);
+      cout << endl;
+      Util::deleteFilesUnderDirectory(param->at(MSK_PRM));
+    } else if (param->count(HMI_PRM) > 0) {
+      cout << "Deleting pre-existing " << param->at(MSK_PRM) << endl;
+      Util::deleteFile(param->at(MSK_PRM));
+    }
+  }
+  
+  if (param->count(RPT_PRM) > 0) {
+    if (param->count(GNM_PRM) > 0) {
+      cout << "Deleting pre-existing files under " << param->at(RPT_PRM);
+      cout << endl;
+      Util::deleteFilesUnderDirectory(param->at(RPT_PRM));
+    } else if (param->count(HMI_PRM) > 0) {
+      cout << "Deleting pre-existing " << param->at(RPT_PRM) << endl;
+      Util::deleteFile(param->at(RPT_PRM));
+    }
+  }
+  
+  if (param->count(SCO_PRM) > 0 && param->count(GNM_PRM) > 0) {
+    cout << "Deleting pre-existing files under " << param->at(SCO_PRM);
+    cout << endl;
+    Util::deleteFilesUnderDirectory(param->at(SCO_PRM));
+  }
+
+  if (param->count(HMO_PRM) > 0 && param->count(GNM_PRM) > 0) {
+    cout << "Deleting pre-existing " << param->at(HMO_PRM) << endl;
+    Util::deleteFile(param->at(HMO_PRM));
+  }
+
+  if (param->count(TBL_PRM) > 0 && param->count(GNM_PRM) > 0) {
+    cout << "Deleting pre-existing " << param->at(TBL_PRM) << endl;
+    Util::deleteFile(param->at(TBL_PRM));
+  }
+
+  // Process the input
+  int k = atoi(param->at(LEN_PRM).c_str());
+  
+  if (param->count(GNM_PRM) > 0) {
+    string genomeDir = param->at(GNM_PRM);
+    int order = atoi(param->at(ORD_PRM).c_str());
+    double s = atoi(param->at(GAU_PRM).c_str());
+    double t = atoi(param->at(THR_PRM).c_str());
+    int minObs = atoi(param->at(MIN_PRM).c_str());
+    
+    // Adjust the threshold when it is one because of the log base.
+    if (((int) t) == 1) {
+      t = 1.5;
+      cout << "The base of the logarithmic function is adjusted." << endl;
+    }
+    
+    
+    // This part or the next
+    Trainer * trainer;
+    if (param->count(CND_PRM) > 0) {
+      trainer = new Trainer(genomeDir, order, k, s, t, param->at(CND_PRM), minObs);
+    } else {
+      trainer = new Trainer(genomeDir, order, k, s, t, minObs);
+    }
+    
+    
+    if (param->count(TBL_PRM)) {
+      cout << "Printing the count of the kmer's to: ";
+      cout << param->at(TBL_PRM) << endl;
+      trainer->printTable(param->at(TBL_PRM));
+    }
+    
+    if (param->count(HMO_PRM) > 0) {
+      cout << "Printing the HMM to: " << endl;
+      cout << param->at(HMO_PRM) << endl;
+      trainer->printHmm(param->at(HMO_PRM));
+    }
+    
+    // Stage 3: Scan
+    cout << endl << endl;
+    cout << "Stage 4: Scanning ..." << endl;
+    vector<string> * fileList = new vector<string>();
+    Util::readChromList(genomeDir, fileList, string("fa"));
+    if (param->count(DIR_PRM) > 0) {
+      Util::readChromList(param->at(DIR_PRM), fileList, string("fa"));
+    }
+    
+    int chromCount = fileList->size();
+    for (int i = 0; i < chromCount; i++) {
+      cout << "Scanning: " << fileList->at(i) << endl;
+      
+      // Output file name
+      string path(fileList->at(i));
+      int slashLastIndex = path.find_last_of(Util::fileSeparator);
+      int dotLastIndex = path.find_last_of(".");
+      string nickName = path.substr(slashLastIndex + 1, dotLastIndex - slashLastIndex - 1);
+      
+      // Process each sequence with the ith file
+      ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+      const vector<Chromosome *> * chromList = maker->makeChromOneDigitList();
+
+      ChromListMaker * oMaker = new ChromListMaker(fileList->at(i));
+      const vector<Chromosome *> * oChromList;
+      if (param->count(MSK_PRM) > 0) {
+	oChromList = oMaker->makeChromList();
+      }
+      
+      for (int h = 0; h < chromList->size(); h++) {
+	ChromosomeOneDigit * chrom = dynamic_cast<ChromosomeOneDigit *>(chromList->at(h));
+	
+	// Scan the forward strand
+	Scanner * scanner = new Scanner(trainer->getHmm(), k, chrom,trainer->getTable());
+	
+	// Scan the reverse complement
+	chrom->makeRC();
+	Scanner * scannerRC = new Scanner(trainer->getHmm(), k, chrom, trainer->getTable());
+	scannerRC->makeForwardCoordinates();
+	scanner->mergeWithOtherRegions(scannerRC->getRegionList());
+	delete scannerRC;
+	chrom->makeRC();
+	
+	
+	// Scan the reverse
+	chrom->makeR();
+	Scanner * scannerR = new Scanner(trainer->getHmm(), k, chrom, trainer->getTable());
+	scannerR->makeForwardCoordinates();
+	scanner->mergeWithOtherRegions(scannerR->getRegionList());
+	delete scannerR;
+
+	//@@ The chromosome now has the sequence of the reverse strand
+	// The actual strand is calculated if the user requested the scores.
+	
+	// Print according to the user's requests
+	bool canAppend = (h == 0) ? false : true;
+	
+	if (param->count(SCO_PRM) > 0) {
+	  // Calculate the forward strand from the reverse
+	  chrom->makeR();
+	  
+	  string scoFile = param->at(SCO_PRM) + Util::fileSeparator + nickName + ".scr";
+	  if (!canAppend) {
+	    cout << "Printing scores to: " << scoFile << endl;
+	  }
+	  // Make sure to print the original E-values not their logarithm
+	  Scorer * scorer = new Scorer(chrom, trainer->getTable());
+	  scorer->printScores(scoFile, canAppend);
+	  delete scorer;
+	}
+	
+	if (param->count(RPT_PRM) > 0) {
+	  string rptFile = param->at(RPT_PRM) + Util::fileSeparator + nickName + ".rpt";
+	  if (!canAppend) {
+	    cout << "Printing locations to: " << rptFile << endl;
+	  }
+	  scanner->printIndex(rptFile, canAppend, atoi(param->at(FRM_PRM).c_str()));
+	}
+	
+	if (param->count(MSK_PRM) > 0) {
+	  string mskFile = param->at(MSK_PRM) + Util::fileSeparator + nickName + ".msk";
+	  if (!canAppend) {
+	    cout << "Printing masked sequence to: " << mskFile << endl;
+	  }
+	  Chromosome * oChrom = oChromList->at(h);
+	  scanner->printMasked(mskFile, *oChrom, canAppend);
+	}
+	
+	// Free memory
+	delete scanner;
+      }
+      
+      delete maker;
+      delete oMaker;
+    }
+    
+    // Free memory
+    fileList->clear();
+    delete fileList;
+    delete trainer;
+  } else if (param->count(HMI_PRM) > 0) {
+    HMM * hmm = new HMM(param->at(HMI_PRM));
+    
+    string chromFile = param->at(SEQ_PRM);
+    string scoresFile = param->at(SCI_PRM);
+    
+    ChromosomeOneDigit * chrom = new ChromosomeOneDigit(chromFile);
+    Scanner * scanner = new Scanner(hmm, k, chrom, scoresFile);
+    
+    if (param->count(RPT_PRM) > 0) {
+      string rptFile = param->at(RPT_PRM);
+      cout << "Printing locations to: " << rptFile << endl;
+      scanner->printIndex(rptFile, false, atoi(param->at(FRM_PRM).c_str()));
+    }
+    
+    if (param->count(MSK_PRM) > 0) {
+      string mskFile = param->at(MSK_PRM);
+      cout << "Printing masked sequence to: " << mskFile << endl;
+      Chromosome oChrom(chromFile);
+      scanner->printMasked(mskFile, oChrom, false);
+    }
+    
+    // Free memory
+    delete scanner;
+    delete chrom;
+    delete hmm;
+  }
+}
+
+int main(int argc, char * argv[]) {
+  cout << endl << endl;
+  cout << "This is Red (REpeat Detector) designed and developed by ";
+  cout << "Hani Zakaria Girgis, PhD." << endl << endl;
+  
+  cout << "Version: 05/22/2015" << endl << endl;
+
+  string message = string("Valid argument pairs:\n");
+
+  message.append("\t-gnm input genome directory, required.\n");
+  message.append("\t\tFiles with \".fa\" extension in this directory are used for completing the table of the adjusted counts.\n");
+  message.append("\t\tThese Files are scanned for repeats.\n");
+  message.append("\t-dir directory including additional input sequences, optional.\n");
+  message.append("\t\tFiles with \".fa\" extension in this directory are NOT used for completing the table.\n");
+  message.append("\t\tThese Files MUST have different names from those in the genome directory.\n");
+  message.append("\t\tThese Files are scanned for repeats.\n");
+  
+  
+  message.append("\t-len word length equals k defining the k-mer. The default is floor(log_4(genome size)).\n");
+  message.append("\t-ord order of the background Markov chain. The default is floor(k/2)-1.\n");
+  message.append("\t-gau half width of the mask. The default is based on the GC content.\n");
+  message.append("\t\t20 if the GC content > 33% and < 67%, 40 otherwise.\n");
+
+  message.append("\t-thr the threshold score of the low adjusted scores of non-repeats. The default is 2.\n");
+  message.append("\t-min the minimum number of the observed k-mers. The default is 3.\n");
+  message.append("\t-tbl file where the table of the adjusted counts is written, optional.\n");
+  message.append("\t-sco directory where scores are saved, optional.\n");
+  message.append("\t\tScore files have the \".scr\" extension.\n");
+  
+  message.append("\t-cnd directory where candidate regions are saved, optional.\n");
+  message.append("\t\tCandidates files have the \".cnd\" extension.\n");
+  message.append("\t-rpt directory where repeats locations are saved, optional.\n");
+  message.append("\t\tRepeats files have the \".rpt\" extension.\n");
+  message.append("\t-msk directory where masked sequences are saved, optional.\n");
+  message.append("\t\tMasked sequences files have the \".msk\" extension.\n");
+
+  message.append("\t-frm the format of the output: 1 (chrName:start-end) or 2 (chrName\tstart\tend).\n");
+  message.append("\t\tThe output format are zero based and the end is exclusive.\n");
+  
+  message.append("\t-hmo file where the HMM is saved, optional.\n\n");
+
+  message.append("Examples:\n");
+  message.append("\tThe following command runs Red with the defaults and generates the masked sequences.\n");
+  message.append("\tRed -gnm genome_directory -msk output_directory\n\n");
+  message.append("\tThe following command runs Red with the defaults and generates the masked sequences and the locations of repeats.\n");
+  message.append("\tRed -gnm genome_directory -msk output_directory -rpt output_directory\n\n");
+
+  // Table of valid argument pairs
+  map<string, string> * validParam = new map<string, string>();
+  validParam->insert(map<string, string>::value_type(LEN_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(GNM_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(ORD_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(GAU_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(THR_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(HMI_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(SEQ_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(SCI_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(TBL_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(SCO_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(HMO_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(MSK_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(RPT_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(CND_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(DIR_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(MIN_PRM, "DUMMY"));
+  validParam->insert(map<string, string>::value_type(FRM_PRM, "DUMMY"));
+
+  // Make a table of the user provided arguments
+  map<string, string> * param = new map<string, string>();
+  if (argc > 1 && argc % 2 == 1) {
+    for (int i = 1; i < argc - 1; i += 2) {
+      if (validParam->count(argv[i]) > 0) {
+	param->insert(map<string, string>::value_type(argv[i], argv[i + 1]));
+      } else {
+	cerr << "Invalid argument: " << argv[i] << " " << argv[i + 1];
+	cerr << endl;
+	cerr << message << endl;
+	return 1;
+      }
+    }
+    
+    
+    // Check if the user provided the essential arguments
+    
+    
+    if (param->count(LEN_PRM) == 0) {
+      if (param->count(GNM_PRM) > 0) {
+	// Calculate the size of the genome
+	long genomeLength = 0;
+	vector<string> * fileList = new vector<string>();
+	Util::readChromList(param->at(GNM_PRM), fileList, "fa");
+	cout << "Calculating the length, k, of the k-mer ";
+	cout << "based on the input genome ... " << endl;
+	for (int i = 0; i < fileList->size(); i++) {
+	  ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+	  const vector<Chromosome *> * chromList = maker->makeChromList();
+	  for (int h = 0; h < chromList->size(); h++) {
+	    genomeLength += chromList->at(h)->getEffectiveSize();
+	  }
+	  delete maker;
+	}
+	fileList->clear();
+	delete fileList;
+	
+	double temp = log(genomeLength) / log(4.0);
+	
+	int k = floor(temp);
+	cout << "The recommended k is " << k << "." << endl;
+	if (k > 15) {
+	  cout << "Due to a memory constraint, k is set to 15.";
+	  cout << endl;
+	  k = 15;
+	}
+
+	if (k < 12) {
+	  cout<< "Due to a statistical consideration, k is set to 12.";
+	  cout << endl;
+	  k = 12;
+	}
+	cout << endl;
+	
+	string kString = Util::int2string(k);
+	param->insert(map<string, string>::value_type(LEN_PRM, kString));
+	
+      } else {
+	cerr << "The word length is required." << endl;
+	cerr << message << endl;
+	return 1;
+      }
+    }
+    
+    if(param->count(FRM_PRM) == 0){
+      cout << "Using the default output format chrName:start-end" << endl;
+      param->insert(map<string, string>::value_type(FRM_PRM, Util::int2string(Scanner::FRMT_POS)));
+    } else {
+      if (atoi(param->at(FRM_PRM).c_str()) !=  Scanner::FRMT_POS && atoi(param->at(FRM_PRM).c_str()) !=  Scanner::FRMT_BED) {
+	cerr << "The output format must be " << Scanner::FRMT_POS << " or ";
+	cerr << Scanner::FRMT_BED << ". The format received is " ;
+	cerr << param->at(FRM_PRM) << "." << endl;
+	return 1;
+      }
+    }
+    
+    if (param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(GNM_PRM));
+      
+      if (param->count(ORD_PRM) == 0) {
+	double k = atoi(param->at(LEN_PRM).c_str());
+	int o = floor(k / 2.0) - 1;
+	
+	cout << "Using the default background order: " << o << ".";
+	cout << endl;
+	
+	string oString = Util::int2string(o);
+	param->insert(map<string, string>::value_type(ORD_PRM, oString));
+      }
+      
+      if (param->count(THR_PRM) == 0) {
+	cout << "Using the default threshold: 2." << endl;
+	param->insert(map<string, string>::value_type(THR_PRM, string("2")));
+      } else {
+	if (atoi(param->at(THR_PRM).c_str()) < 1) {
+	  cerr << "The threshold cannot be less than 1.";
+	  cerr << endl;
+	  cerr << message << endl;
+	  return 1;
+	}
+      }
+      
+      if (param->count(MIN_PRM) == 0) {
+	cout << "Using the default minimum of the observed count of k-mers: 3." << endl;
+	param->insert(map<string, string>::value_type(MIN_PRM, string("3")));
+      } else {
+	if (atoi(param->at(MIN_PRM).c_str()) < 0) {
+	  cerr << "The minimum of the observed count of k-mers cannot be less than 0.";
+	  cerr << endl;
+	  cerr << message << endl;
+	  return 1;
+	}
+      }
+      
+      if (param->count(GAU_PRM) == 0) {
+	cout << "Calculating GC content ..." << endl;
+	
+	// 1: Count the gc content of the input genome
+	long genomeLength = 0;
+	long genomeGc = 0;
+	vector<string> * fileList = new vector<string>();
+	Util::readChromList(param->at(GNM_PRM), fileList, "fa");
+	for (int i = 0; i < fileList->size(); i++) {
+	  ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+	  const vector<Chromosome *> * chromList = maker->makeChromList();
+
+	  for (int h = 0; h < chromList->size(); h++) {
+	    genomeGc += chromList->at(h)->getGcContent();
+	    genomeLength += chromList->at(h)->getEffectiveSize();
+	  }
+	  delete maker;
+	}
+	fileList->clear();
+	delete fileList;
+	
+	// 2: Calculate the gc content of the input genome
+	double gc = 100.00 * genomeGc / genomeLength;
+	int w = 20;
+	if (gc < 33 || gc > 67) {
+	  w = 40;
+	}
+	cout << "Using the default half width: " << w;
+	cout << " based on the GC content of " << gc << endl;
+	string wString = Util::int2string(w);
+	param->insert(map<string, string>::value_type(GAU_PRM, wString));
+      }
+    } else if (param->count(HMI_PRM) > 0) {
+      Util::checkFile(param->at(HMI_PRM));
+      
+      if (param->count(SEQ_PRM) == 0) {
+	cerr << "The sequence file is required.";
+	cerr << endl;
+	cerr << message << endl;
+	return 1;
+      } else {
+	Util::checkFile(param->at(SEQ_PRM));
+      }
+      
+      if (param->count(SCI_PRM) == 0) {
+	cerr << "The scores file is required.";
+	cerr << endl;
+	cerr << message << endl;
+	return 1;
+      } else {
+	Util::checkFile(param->at(SCI_PRM));
+      }
+      
+    } else {
+      cerr << "A mode is required: training and scanning (-gnm) or ";
+      cerr << "scanning only (-hmi)." << endl;
+      cerr << message << endl;
+      return 1;
+    }
+    
+    // Check optional parameters
+    if (param->count(TBL_PRM) > 0 && param->count(GNM_PRM) == 0) {
+      cerr << "Printing the k-mer table is optional with -gnm only.";
+      cerr << endl;
+      cerr << message << endl;
+      return 1;
+    }
+    
+    if (param->count(HMO_PRM) > 0 && param->count(GNM_PRM) == 0) {
+      cerr << "Printing the HMM is optional with -gnm only.";
+      cerr << endl;
+      cerr << message << endl;
+      return 1;
+    }
+    
+    if (param->count(SCO_PRM) > 0 && param->count(GNM_PRM) == 0) {
+      cerr << "Printing the scores is optional with -gnm only.";
+      cerr << endl;
+      cerr << message << endl;
+      return 1;
+    } else if (param->count(SCO_PRM) > 0 && param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(SCO_PRM));
+    }
+    
+
+    if (param->count(CND_PRM) > 0 && param->count(GNM_PRM) == 0) {
+      cerr << "Printing candidate regions is optional with -gnm only.";
+      cerr << endl;
+      cerr << message << endl;
+      return 1;
+    } else if (param->count(CND_PRM) > 0 && param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(CND_PRM));
+    }
+    
+	
+    if (param->count(DIR_PRM) > 0 && param->count(GNM_PRM) == 0) {
+      cerr << "Processing additional sequences is optional with -gnm only.";
+      cerr << endl;
+      cerr << message << endl;
+      return 1;
+    } else if (param->count(DIR_PRM) > 0 && param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(DIR_PRM));
+    }
+    
+    if (param->count(MSK_PRM) > 0 && param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(MSK_PRM));
+    }
+    
+    if (param->count(RPT_PRM) > 0 && param->count(GNM_PRM) > 0) {
+      Util::checkFile(param->at(RPT_PRM));
+    }
+    
+    // Print out the parameters table
+    typedef map<string, string> myMap;
+    myMap::iterator sIter = param->begin();
+    myMap::iterator eIter = param->end();
+    cout << endl << "List of final parameters: " << endl;
+    while (sIter != eIter) {
+      cout << (*sIter).first << ": " << (*sIter).second << endl;
+      sIter++;
+    }
+    cout << endl;
+    
+    // Start!
+    drive(param);
+    
+    // Clear parameters when done.
+    param->clear();
+    delete param;
+  } else {
+    cerr << "Argument pairs of the form: -flag value are required.";
+    cerr << endl;
+    cerr << message << endl;
+  }
+  
+  //return EXIT_SUCCESS;
+  return 0;
+}
diff --git a/src/cluster/Makefile b/src/cluster/Makefile
new file mode 100644
index 0000000..817559a
--- /dev/null
+++ b/src/cluster/Makefile
@@ -0,0 +1,31 @@
+TARGET ?= meshclust2
+VERSION ?= 2.0.0
+CXX ?= g++
+ifeq ($(debug),yes)
+	CXXFLAGS += -ggdb -DDEBUG -fno-omit-frame-pointer -fopenmp
+else
+	CXXFLAGS += -fopenmp -O3 -march=native -g
+endif
+CXXFLAGS += -std=c++11 -DVERSION=\"$(VERSION)\"
+LDFLAGS += -lm
+
+SOURCES := $(shell find ./src -name '*.cpp')
+OBJECTS = $(SOURCES:%.cpp=bin/%.o)
+BIN_OBJECTS := $(shell find  ../../bin/ -mindepth 2 -name '*.o')
+
+all: clean $(TARGET)
+
+$(TARGET): $(OBJECTS) $(BIN_OBJECTS)
+	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
+
+bin/%.o: %.cpp
+	mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) -c $< -o $@
+
+clean:
+	$(RM) $(OBJECTS) $(TARGET)
+
+install: $(TARGET)
+	cp $(TARGET) ~/bin
+
+.PHONY: all clean install
diff --git a/src/cluster/src/Center.h b/src/cluster/src/Center.h
new file mode 100644
index 0000000..8c2acc5
--- /dev/null
+++ b/src/cluster/src/Center.h
@@ -0,0 +1,42 @@
+/* -*- C++ -*-
+ *
+ * Center.h
+ *
+ * Author: Benjamin T James
+ */
+#ifndef CENTER_H
+#define CENTER_H
+
+#include "Point.h"
+
+template<class T>
+struct Center {
+	Center(Point<T>* c, const vector<Point<T>*> &pts) : center(c->clone()), points(pts), is_to_delete(false) {
+	}
+	Center(const Center<T> &cc) : center(cc.center->clone()), points(cc.points), is_to_delete(cc.is_to_delete) {}
+
+
+	// Center(const Center<T>& c) {
+	// 	center = c.get_clone();
+	// 	points = c.getPoints_c();
+	// 	is_to_delete = c.is_delete();
+	// }
+	~Center() { if (is_to_delete) { delete center; }}
+
+	Point<T>* getCenter() { return center; }
+	vector<Point<T>*> &getPoints() { return points; }
+
+	const vector<Point<T>*> &getPoints_c() const { return points; };
+	bool is_delete() const { return is_to_delete; }
+	void lazy_remove() { is_to_delete = true; }
+	size_t size() const { return points.size(); }
+	bool empty() const { return points.empty(); }
+	Point<T>* get_clone() const {
+		return center->clone();
+	}
+	Point<T> *center;
+	vector<Point<T>*> points;
+	bool is_to_delete;
+};
+
+#endif
diff --git a/src/cluster/src/ClusterFactory.cpp b/src/cluster/src/ClusterFactory.cpp
new file mode 100644
index 0000000..741a325
--- /dev/null
+++ b/src/cluster/src/ClusterFactory.cpp
@@ -0,0 +1,1024 @@
+/* -*- C++ -*-
+ *
+ * ClusterFactory.cpp
+ *
+ * Author: Benjamin T James
+ */
+
+#ifndef HEADER_HACK
+#include "ClusterFactory.h"
+#endif
+
+#include <assert.h>
+#include <regex>
+#include <tuple>
+#include <queue>
+#include <string>
+#include <list>
+#include "Histogram.h"
+#include "../../nonltr/KmerHashTable.h"
+#include "../../nonltr/ChromListMaker.h"
+#include "DivergencePoint.h"
+#include "Center.h"
+#include "Progress.h"
+//#include <omp.h>
+
+template<class T>
+T avg_distance(Point<T> &c, const std::vector<Point<T>*> &vec)
+{
+	T dist = 0;
+	for (auto pt : vec) {
+		dist += pt->distance(c);
+	}
+	return dist / vec.size();
+}
+template<class T>
+Point<T>* find_center(const std::vector<Point<T>*> &vec)
+{
+	Point<T>* best = vec.front();
+	T lowest = avg_distance(*best, vec);
+	for (int i = 1; i < vec.size(); i++) {
+		T dist = avg_distance(*vec[i], vec);
+		if (dist < lowest) {
+			best = vec[i];
+			lowest = dist;
+		}
+	}
+	return best;
+}
+
+
+
+template<class T>
+void old_merge(vector<Point<T>*> &centers, map<Point<T>*,vector<Point<T>*>*> &clusters, T bandwidth)
+{
+	cout << "Merging points ... ";
+	cout.flush();
+	vector<Point<T>*> new_centers;
+	vector<Point<T>*> to_delete;
+	for (int i = 0; i < centers.size(); i++) {
+		bool is_good = true;
+		for (int j = i + 1; j < centers.size(); j++) {
+			T dist = centers[i]->distance(*centers[j]);
+			if (dist < bandwidth) {
+				cout << "Merging centers " << centers[i]->get_header() << " and " << centers[j]->get_header() << endl;
+				for (auto p : *clusters[centers[i]]) {
+					clusters[centers[j]]->push_back(p);
+				}
+				delete clusters[centers[i]];
+				clusters[centers[i]] = NULL;
+				centers[i]->set_to_delete(true);
+				to_delete.push_back(centers[i]);
+				delete centers[i];
+				is_good = false;
+				break;
+			}
+		}
+		if (is_good) {
+			new_centers.push_back(centers[i]);
+		}
+	}
+	for (auto it = clusters.begin(); it != clusters.end(); it++) {
+		if ((*it).first->is_to_delete()) {
+			clusters.erase(it);
+		}
+	}
+	centers.clear();
+	centers = new_centers;
+	cout << "Done" << endl;
+}
+
+template<class T>
+void sort_nn_func(std::vector<Point<T> *> &points, std::function<bool(const Point<T>&, const Point<T>&)> func, std::function<uint64_t(const Point<T>&, const Point<T>&)> distfunc) {
+	if (points.empty()) {
+		return;
+	}
+	cout << "Sorting points... ";
+	cout.flush();
+	list<int> good;
+	int good_idx = 0;
+	for (int i = points.size() - 1; i > 0; i--) { // start at idx 1 bc we don't want to redelete it
+		good.push_front(i);
+	}
+	vector<Point<T>*> total_points;
+	total_points.push_back(points[0]);
+//	good.erase(good.begin());
+	while (good.size() > 0) {
+		auto last = total_points.back();
+		auto smallest = good.begin();
+		uint64_t small_dist = std::numeric_limits<uint64_t>::max();// / points[*smallest]->prob_under(*last);
+		int count = 0;
+
+		for (auto i = good.begin(); i != good.end(); i++) {
+			if (func(*points[*i], *last)) {
+//				cout << "Breaking loop " << points[*i]->get_length() << " " << last->get_length() << " -> " << count << endl;
+				break;
+			}
+			uint64_t dist = distfunc(*points[*i],*last);// / points[*i]->prob_under(*last);
+			if (dist < small_dist) {
+				small_dist = dist;
+				smallest = i;
+			}
+			count++;
+		}
+//		cout << "Number of gaps: " << num_gaps << endl;
+		// if (func(*points[*smallest], *last)) {
+		// 	cout << "Gap " << points[*smallest]->get_length() << " " << last->get_length() << endl;
+		// }
+//		cout << "Sorting: " << points[*smallest]->get_header() << endl;
+		total_points.push_back(points[*smallest]);
+		//	cout << points[*smallest]->get_header() << endl;
+		// if (total_points.size() % 100 == 0) {
+		// 	cout << "Size: " << total_points.size() << endl;
+		// }
+		good.erase(smallest);
+	}
+	assert(good.empty());
+	assert(points.size() == total_points.size());
+	points = total_points;
+	cout << "Done" << endl;
+}
+
+template<class T>
+void sort_nn_length(std::vector<Point<T>*> &points, double similarity) {
+	similarity *= 0.90;
+	if (points.empty()) {
+		return;
+	}
+	cout << "Sorting points by length... ";
+	cout.flush();
+	list<int> good;
+	int good_idx = 0;
+	for (int i = 1; i < points.size(); i++) { // start at idx 1 bc we don't want to redelete it
+		good.push_back(i);
+	}
+	vector<Point<T>*> total_points;
+	total_points.push_back(points[0]);
+//	good.erase(good.begin());
+	bool working = true;
+	while (working && good.size() > 0) {
+		working = false;
+		auto last = total_points.back();
+		auto smallest = good.begin();
+		T small_dist = points[*smallest]->distance(*last);// / points[*smallest]->prob_under(*last);
+		for (auto i = good.begin(); i != good.end(); i++) {
+			double ratio = 100.0 * (double)points[*i]->get_length() / last->get_length();
+			if (ratio < similarity) {
+				//	cout << "Length ratio " << ratio << " is less than " << similarity << "." << endl;
+				break;
+			}
+			T dist = points[*i]->distance(*last);// / points[*i]->prob_under(*last);
+			if (dist < small_dist) {
+				small_dist = dist;
+				smallest = i;
+			}
+		}
+		total_points.push_back(points[*smallest]);
+		//	cout << points[*smallest]->get_header() << endl;
+		// if (total_points.size() % 100 == 0) {
+		// 	cout << "Size: " << total_points.size() << endl;
+		// }
+		good.erase(smallest);
+		working = true;
+	}
+	assert(good.empty());
+	assert(points.size() == total_points.size());
+	points = total_points;
+	cout << "Done" << endl;
+}
+
+
+
+template<class T>
+void calculate_gaps(const vector<Point<T>*> &vec, queue<int> &gaps, std::function<bool(const Point<T>&, const Point<T>&)> func)
+{
+	for (int i = 1; i < vec.size(); i++) {
+		if (func(*vec[i], *vec[i-1])) {
+			gaps.push(i);
+		}
+	}
+}
+
+// TODO: fix bounds
+template<class T>
+pair<int,int> find_bound(int idx, const vector<Point<T>*> &vec, double sim)
+{
+	size_t begin_len = vec[idx]->get_length();
+	int begin_idx = idx;
+	int end_idx = idx;
+	#pragma omp parallel for
+	for (int j = 0; j < 2; j++) {
+		if (j == 1) {
+			for (int i = idx - 1; i >= 0; i--) {
+				if (vec[i]->get_id() == 0) {
+					size_t len = vec[i]->get_length();
+					if (begin_len < sim * len) {
+						break;
+					}
+					begin_idx = i;
+				}
+			}
+		} else {
+			for (int i = idx + 1; i < vec.size(); i++) {
+				if (vec[i]->get_id() == 0) {
+					size_t len = vec[i]->get_length();
+					if (len < sim * begin_len) {
+						break;
+					}
+					end_idx = i;
+				}
+			}
+		}
+	}
+	if (begin_idx < end_idx) {
+		return make_pair(begin_idx, end_idx);
+	} else {
+		return make_pair(0, vec.size() - 1);
+	}
+}
+
+template<class T>
+vector<pair<int,double> > get_available_or_min(const vector<Point<T>*> &points, Point<T>* p, pair<int,int> bounds, const Trainer<T>& trn, bool& used_min)
+{
+	vector<pair<Point<T>*,int> > good;
+	for (int i = bounds.first; i <= bounds.second; i++) {
+		if (points[i]->get_id() == 0) {
+			good.push_back(make_pair(points[i], i));
+		}
+	}
+	bool f;
+	vector<pair<int,double> > close;// = trn.get_close(p, good, f);
+	used_min = f;
+	return close;
+	// if (used_min) {
+	// 	used_min = true;
+	// 	// find min
+	// 	uintmax_t minimum = std::numeric_limits<uintmax_t>::max();
+	// 	int min_index = -1;
+	// 	vector<pair<int,int> > v;
+	// 	for (int i = 0; i < good.size(); i++) {
+	// 		uintmax_t u = p->distance(*good[i].first);
+	// 		if (u < minimum) {
+	// 			min_index = good[i].second;
+	// 			minimum = u;
+	// 		}
+	// 	}
+	// 	//std::cout << "none found, using minimum, dist = " << 1.0 - (double)minimum / 10000 << " " << points[min_index]->get_header() << endl;
+	// 	//	v.push_back(make_pair(min_index, minimum));
+	// 	v.push_back(make_pair(close[0], p->distance(*points[close[0]])));
+	// 	return v;
+	// } else
+	// if (!close.empty()) {
+	// 	vector<pair<int,int> > v(close.size());
+	// 	#pragma omp parallel for
+	// 	for (int i = 0; i < close.size(); i++) {
+	// 		uintmax_t u = p->distance(*points[close[i]]);
+	// 		v.at(i) = make_pair(close[i], u);
+	// 	}
+	// 	return v;
+	// } else {
+	// 	vector<pair<int,int> > v;
+	// 	return v;
+	// }
+}
+
+template<class T>
+void mean_shift_update(vector<Center<T> > &part, int j, const Trainer<T>& trn, int delta)
+{
+	auto center = part[j].getCenter();
+
+	int i_begin = std::max(0, j - delta);
+	int i_end = std::min(j + delta, (int)part.size()-1);
+	// if (i_begin == i_end) {
+	// 	return;
+	// }
+	Point<double>* top = center->create_double();
+	top->zero();
+	Point<double>* temp = top->clone();
+	uintmax_t bottom = 0;
+	vector<pair<Point<T>*, bool> > good;
+	for (int i = i_begin; i <= i_end; i++) {
+//		const auto& vec = part.at(centers[i]);
+		const auto& vec = part[i].getPoints();
+		for (auto p : vec) {
+			good.push_back(make_pair(p, false));
+			// p->set_arg_to_this_d(*temp);
+			// *top += *temp;
+			// bottom++;
+		}
+	}
+	trn.filter(center, good);
+	if (!good.empty()) {
+		for (auto p : good) {
+			p.first->set_arg_to_this_d(*temp);
+			*top += *temp;
+			bottom++;
+		}
+		*top /= bottom;
+		Point<T>* next = trn.closest(top, good);
+		// Point<T> *next = NULL;
+		// int next_dist = std::numeric_limits<int>::max();
+		// for (int i = 0; i < N; i++) {
+		// 	int dist = points[i]->distance_d(*top);
+		// 	if (dist < next_dist) {
+		// 		next_dist = dist;
+		// 		next = points[i];
+		// 	}
+		// }
+		if (next != NULL) {
+			center->set(*next);
+			center->set_data_str(next->get_data_str());
+		} else {
+			cerr << "mean shift: NULL" << endl;
+		}
+	} else {
+		cout << "GOOD: EMPTY" << endl;
+	}
+	delete top;
+	delete temp;
+}
+
+template<class T>
+Point<T>* get_mean(vector<Point<T>*> &available, Point<T>& last, double bandwidth)
+{
+	Point<double>* top = last.create_double();
+	top->zero();
+	Point<double>* temp = top->clone();
+	double bottom = 0;
+	const int N = available.size();
+	if (N == 0) {
+		throw "N cannot be 0, bad";
+	}
+	bottom = available.size();
+	// TODO: parallelize this loop
+	for (int i = 0; i < N; i++) {
+		available[i]->set_arg_to_this_d(*temp);
+		*top += *temp;
+	}
+
+	if (bottom != 0) {
+		*top /= bottom;
+	} else {
+		cerr << "No points in vector" << endl;
+		throw 5;
+	}
+#pragma omp declare reduction(cmin:std::pair<Point<T>*,double>: \
+			      omp_out = omp_in.second < omp_out.second ? omp_in : omp_out ) \
+        initializer (omp_priv = std::make_pair((Point<T>*)NULL, (double)std::numeric_limits<double>::max()))				\
+
+	std::pair<Point<T>*,double> result = std::make_pair((Point<T>*)NULL, (double)std::numeric_limits<double>::max());
+	//todo: add pragma back in
+#pragma omp parallel for reduction(cmin:result)
+	for (int i = 0; i < available.size(); i++) {
+		double dist = available[i]->distance_d(*top);
+		if (dist < result.second) {
+			result = std::make_pair(available[i], dist);
+		}
+	}
+	delete top;
+	delete temp;
+	if (result.first == NULL && !available.empty()) {
+		throw "not working";
+	}
+	return result.first;
+}
+
+template<class T>
+bool merge(vector<Center<T> > &centers, const Trainer<T>& trn, int delta, int bandwidth)
+{
+	int num_merge = 0;
+	for (int i = 0; i < centers.size(); i++) {
+		long ret = trn.merge(centers, i, i + 1, std::min((int)centers.size()-1, i + delta));
+		if (ret > i) {
+
+			num_merge++;
+			auto &to_add = centers[ret].getPoints();
+			auto &to_del = centers[i].getPoints();
+			to_add.insert(std::end(to_add), std::begin(to_del), std::end(to_del));
+			centers[i].lazy_remove();
+		}
+		// vector<pair<Point<T>*,double> > to_merge;
+		// for (int j = i + 1; j < std::min((int)centers.size(), i + 1 + delta); j++) {
+		// 	to_merge.push_back(std::make_pair(centers[j].getCenter(), -1));
+		// }
+		// Point<T>* closest = trn.merge(centers[i].getCenter(), to_merge);
+		// if (closest != NULL) {
+		// 	#ifdef DEBUG
+		// 	cout << "Merged center " << centers[i]->get_header() << " and " << closest->get_header() << endl;
+		// 	#endif
+		// 	num_merge++;
+		// 	// auto& to_del = partition[centers[i]];
+		// 	// auto& to_add = partition[closest];
+		// 	// to_add.insert(std::end(to_add), std::begin(to_del), std::end(to_del));
+		// 	// partition.erase(centers[i]);
+		// 	// centers[i]->set_to_delete(true);
+		// 	auto& to_del = partition[centers[i]];
+		// 	auto& to_add = partition[closest];
+		// 	to_add.insert(std::end(to_add), std::begin(to_del), std::end(to_del));
+		// 	partition.erase(centers[i]);
+		// 	centers[i]->set_to_delete(true);
+
+		// }
+	}
+	//cout << "Merged " << num_merge << " centers" << endl;
+	centers.erase(std::remove_if(centers.begin(), centers.end(), [](const Center<T>& p) {
+			return p.is_delete();
+		}), centers.end());
+	return num_merge > 0;
+}
+
+template<class T>
+void print_output(const string& output, vector<Center<T> > & partition)
+{
+	cout << "Printing output" << endl;
+	std::ofstream ofs;
+	ofs.open(output, std::ofstream::out);
+	int counter = 0;
+	for (auto& cen : partition) {
+		if (cen.empty()) {
+			continue;
+		}
+		ofs << ">Cluster " << counter << endl;
+		int pt = 0;
+		bool cen_found = false;
+		for (auto p : cen.getPoints()) {
+			if (p->get_id() == cen.getCenter()->get_id()) {
+				cen_found = true;
+				break;
+			}
+		}
+		if (!cen_found) {
+			cout << "Center not found" << endl;
+			cout << "Cluster " << counter << " has center " << cen.getCenter()->get_header() << endl;
+			//	cen.getCenter()->set(*cen.getPoints().at(0));
+		}
+		for (auto p : cen.getPoints()) {
+			string s = p->get_header();
+			ofs << pt << "\t" << p->get_length() << "nt, " << s << "... ";
+			if (p->get_id() == cen.getCenter()->get_id()) {
+				ofs << "*";
+			}
+			ofs << endl;
+			pt++;
+		}
+		counter++;
+	}
+	ofs.close();
+}
+
+template<class T>
+void sort(vector<Point<T>*> &points, vector<Point<T>*> &centers, int bandwidth, double sim, const Trainer<T>& trn, string output_file, int iter, int delta)
+{
+	int cur = 0;
+	points[0]->set_id(points.size());
+	cur++;
+	int last = 0;
+	vector<int> v;
+	using partition = map<Point<T>*, vector<Point<T>*> >;
+	partition part;
+	centers.push_back(points.front()->clone());
+	part[centers.front()].push_back(points.front());
+
+	while (true) {
+		pair<int,int> bounds = find_bound(last, points, sim);
+		bool used_min;
+		auto available = get_available_or_min(points, points[last], bounds, trn, used_min);
+		// std::sort(available.begin(), available.end(),
+		// 	  [](const pair<int,int> a, const pair<int,int> b) {
+		// 		  return a.second < b.second;
+		// 	  });
+//		auto available = trn.get_close(points[last], points, bounds, used_min);
+		if (available.empty()) {
+			break;
+		}
+//		used_min = used_min && !v.empty();
+		if (used_min) {
+			if (!v.empty()) {
+				auto c = points[last]->clone();
+				centers.push_back(c);
+				for (auto idx : v) {
+					part[c].push_back(points[idx]);
+				}
+				v.clear();
+			}
+			last = available.back().first;
+		}
+		for (auto pr : available) {
+			if (cur % 10000 == 0) {
+				cout << "Placed " << cur << endl;
+			}
+			points[pr.first]->set_id(cur);
+			v.push_back(pr.first);
+			cur++;
+		}
+		if (!used_min) {
+			last = get_mean(v, *points[last], points, bandwidth);
+		}
+	}
+	auto c = points[last]->clone();
+	centers.push_back(c);
+	for (auto idx : v) {
+		part[c].push_back(points[idx]);
+	}
+	points[0]->set_id(0);
+	centers[0]->set_id(0);
+
+	cout << "Found " << centers.size() << " initial centers" << endl;
+	assert(centers.size() == part.size());
+	std::sort(points.begin(), points.end(), [](const Point<T>* a, const Point<T>* b) {
+			return a->get_id() < b->get_id();
+		});
+	std::sort(centers.begin(), centers.begin(), [](const Point<T>* a, const Point<T>* b) {
+			return a->get_id() < b->get_id();
+		});
+	// for (int i = 0; i < points.size(); i++) {
+	// 	cout << points[i]->get_header() << " ";
+	// 	if (i == 0) {
+	// 		cout << endl;
+	// 		continue;
+	// 	};
+	// 	int last_dist = points[i]->distance(*points[i-1]);
+	// 	cout << last_dist << endl;
+	// }
+	Point<T>* lastp = NULL;
+	for (auto c : centers) {
+		auto v = part[c];
+		for (auto p : v) {
+			cout << p->get_header() << " ";
+			cout << c->get_header() << " ";
+			cout << p->distance(*c) << " ";
+			if (lastp == NULL) {
+				cout << endl;
+			} else {
+				cout << p->distance(*lastp) << endl;
+			}
+			lastp = p;
+		}
+	}
+	for (int i = 0; i < iter; i++) {
+		print_output(output_file + to_string(i), part);
+		cout << "Mean shift iteration " << i << endl;
+		#pragma omp parallel for
+		for (int j = 0; j < centers.size(); j++) {
+			mean_shift_update(part, centers, j, trn, delta);
+		}
+		merge(centers, part, trn, delta, bandwidth);
+		for (auto const& kv : part) {
+			if (kv.second.empty()) {
+				cerr << "Empty cluster " << kv.first->get_header() << endl;
+				throw 0;
+			}
+		}
+	}
+	for (int j = 0; j < centers.size(); j++) {
+		mean_shift_update(part, centers, j, trn, 0);
+	}
+	print_output(output_file, part);
+}
+
+
+/*
+ * Accumulates points in a center until none are close,
+ * then returns the next center (not cloned)
+ */
+template<class T>
+size_t accumulate(Point<T>** last_ptr, bvec<T> &points, vector<Center<T> > &centers,
+		const Trainer<T>& trn, double sim, double bandwidth, int total_iter)
+{
+	Point<T>* last = *last_ptr;
+	vector<Point<T>*> current = {last};
+	bool is_min = false;
+
+	for (int num_iter=0; !is_min; num_iter++) {
+		#ifdef DEBUG
+		cout << num_iter << " last: " << last->get_header() << endl;
+		#endif
+		auto len = last->get_length();
+		auto bounds = points.get_range(len * sim, len / sim);
+		auto result = trn.get_close(last,
+			      points.iter(bounds.first),
+			      points.iter(bounds.second),
+			      is_min);
+
+		if (is_min) {
+			Point<T>* new_pt = get<0>(result);
+			//	cout << "minimum point: " << new_pt->get_header() << endl;
+			size_t r = get<2>(result);
+			size_t c = get<3>(result);
+			#ifdef DEBUG
+			cout << "center added" << endl;
+			#endif
+			// no close points left for center,
+			// returned value is the next center (return this)
+			//points.remove_available(bounds.first, bounds.second, newvec);
+			if (new_pt == NULL) {
+				// No points left in range, try 1st point
+				*last_ptr = points.pop();
+			} else {
+				// New center
+				*last_ptr = new_pt;
+				points.erase(r, c);
+			}
+			vector<Point<T>*> newvec;
+			points.remove_available(bounds.first, bounds.second, newvec); // DEBUGGING USE ONLY
+			if (!newvec.empty()) {
+				throw "this should never happen";
+			}
+		} else { // keep adding points, find new mean
+			size_t prev_size = current.size();
+			points.remove_available(bounds.first, bounds.second, current);
+
+			last = get_mean(current, *last, bandwidth);
+			size_t added_size = current.size() - prev_size;
+			#ifdef DEBUG
+			cout << "added new points (" << added_size << ")" << endl;
+			#endif
+			if (last == NULL) {
+				cerr << "Last is null" << endl;
+				throw 100;
+			}
+		}
+	}
+//	cout << "Pushed back center " << last->get_header() << endl;
+	Center<T> cc(last, current);
+	centers.push_back(cc);
+//	Center<T> cen(last, current);
+//	centers.emplace_back(last, current);
+	// Point<T>* center = last->clone();
+	// centers.push_back(center);
+	// part[center] = current;
+	#ifdef DEBUG
+	for (auto p : current) {
+		cout << total_iter << " Cluster " << last->get_header() << ": " << p->get_header() << endl;
+	}
+	#endif
+        // if (points.empty()) {
+	// 	return true;
+	// } else {
+	// 	return false;
+	// }
+	return current.size();
+}
+
+
+template<class T>
+void ClusterFactory<T>::MS(bvec<T> &points, T bandwidth, double sim, const Trainer<T>& trn, string output, int iter, int delta)
+{
+	vector<Center<T> > part;
+//	using partition = map<Point<T>*, vector<Point<T>*> >;
+//	partition part;
+
+	Point<T>* last = points.pop();
+	//cout << "First length: " << last->get_length() << endl;
+	Progress pa(points.size(), "Accumulation");
+        for (int num = 0; last != NULL; num++) {
+	        size_t n = accumulate(&last, points, part, trn, sim, bandwidth, num);
+		pa += n;
+	}
+	pa.end();
+//	points.check();
+	size_t total = 0;
+	for (auto cen  : part) {
+		total += cen.getPoints().size();
+	}
+	cout << "total size: " << total << endl;
+	Progress pu(iter, "Update");
+	for (int i = 0; i < iter; i++) {
+		// #ifdef DEBUG
+		//print_output(output + to_string(i), part);
+		// #endif
+		//cout << "Mean shift iteration " << i << endl;
+		#pragma omp parallel for
+		for (int j = 0; j < part.size(); j++) {
+			mean_shift_update(part, j, trn, delta);
+		}
+		merge(part, trn, delta, bandwidth);
+		pu++;
+	}
+
+	#pragma omp parallel for
+	for (int j = 0; j < m_centers.size(); j++) {
+		mean_shift_update(part, j, trn, 0);
+	}
+	pu.end();
+	print_output(output, part);
+}
+
+/*
+ * This uses a callback to specify the specific type of point.
+ *
+ * To call this, use like:
+ *
+ * factory.build_points("input", &ClusterFactory<PtType>::get_histogram);
+ */
+template<class T>
+std::vector<Point<T>*> ClusterFactory<T>::build_points(vector<string> fileList, std::function<Point<T>*(ChromosomeOneDigit *)> get_point)
+{
+	std::vector<Point<T>*> points;
+	std::vector<Point<T>*> cpoints;
+	unsigned fsize = fileList.size();
+	std::vector<Point<T>*> initial_centers;
+	std::stringstream buffer;
+	buffer << "Counting " << k << "-mers";
+	Progress p(fsize, buffer.str());
+	for (unsigned i = 0; i < fsize; i++) {
+		p++;
+		ChromListMaker *maker = new ChromListMaker(fileList.at(i));
+		const std::vector<Chromosome *> * chromList = maker->makeChromOneDigitList();
+		unsigned csize = chromList->size();
+#pragma omp parallel for ordered
+		for (unsigned h = 0; h < csize; h++) {
+			ChromosomeOneDigit *chrom = dynamic_cast<ChromosomeOneDigit *>(chromList->at(h));
+			if (chrom) {
+				Point<T> *h = get_point(chrom);
+				if (h != NULL) {
+#pragma omp ordered
+					{
+						//	cout << "Header: " << h->get_header() << endl;
+					points.push_back(h);
+					}
+				}
+			} else {
+				throw InvalidStateException(string("Dynamic cast failed"));
+			}
+		}
+		delete maker;
+	}
+	return points;
+//	std::random_shuffle(points.begin(), points.end());
+//	queue<int> gaps;
+//	calculate_gaps<T>(points, gaps, func);
+	// for (int i = 1; i < points.size(); i++) {
+	// 	int la = points[i]->get_length();
+	// 	int lb = points[i-1]->get_length();
+	// 	if (lb > la && 100.0 * la / lb < sim) {
+	// 		gaps.push(i);
+	// 	}
+	// }
+
+
+// 	vector<vector<Point<T>*>> p;
+// 	vector<Point<T>*> tmp;
+// 	tmp.push_back(points[0]);
+// 	for (int j = 1; j < points.size(); j++) {
+
+// 		int la = points[j]->get_length();
+// 		int lb = points[j-1]->get_length();
+// 		assert(lb >= la);
+// 		if (lb > la && 100.0 * la / lb < sim) {
+// 			p.push_back(tmp);
+// 			cout << "Gap " << tmp.size() << endl;
+// 			tmp.clear();
+// 		}
+// 		tmp.push_back(points[j]);
+// 	}
+// 	if (!tmp.empty()) {
+// 		p.push_back(tmp);
+// 	}
+
+// //	calculate_distances(points);
+// 	int idx = 0;
+// 	for (auto &c : p) {
+// 		sort_nn_func<T>(c, func);
+// 		for (auto v : c) {
+// 			v->set_id(idx++);
+// 			cpoints.push_back(v);
+// 		}
+// 	}
+
+	// sort_nn_func<T>(points,
+	// 		[&](const Point<T>&a, const Point<T>&b) {
+	// 			int la = a.get_length();
+	// 			int lb = b.get_length();
+	// 			return lb > la && 100.0 * la / lb < sim;
+	// 		},
+	// 		[](const Point<T>& a, const Point<T>& b) {
+	// 		        return a.distance_k1(b);
+	// 		});
+
+
+	// // for(auto p : points){
+	// //	cout << p->get_header() << endl;
+	// // }
+
+
+
+	// sort_nn_func<T>(points,
+	// 		[&](const Point<T>& a, const Point<T>& b) {
+	// 			int la = a.get_length();
+	// 			int lb = b.get_length();
+	// 			if (lb > la && 100.0 * la / lb < sim) {
+	// 				double mono = a.distance_k1(b) * 100;
+	// 				bool q = mono < sim;
+	// 				/*
+	// 				if (q) {
+	// 					cout << "TRUE" << endl;
+	// 				} else {
+	// 					cout << "FALSE"<< endl;
+	// 				}
+	// 				*/
+	// 				return q;
+	// 			} else {
+	// 				return false;
+	// 			}
+	// 		},
+	// 		[](const Point<T>& a, const Point<T>& b) {
+	// 			return a.distance(b);
+	// 		});
+	// uint64_t idx = 0;
+	// for (auto v : points) {
+	// 	v->set_id(idx++);
+
+	// 	cpoints.push_back(v);
+	// }
+	// cout << "Points: " << cpoints.size() << endl;
+
+
+	// for (int i = 0; i < points.size(); i++) {
+	// 	cout << points[i]->get_header();
+	// 	if (i > 0) {
+	// 		cout << "  " << points[i]->distance(*points[i-1]);
+	// 	}
+	// 	cout << endl;
+	// }
+
+
+
+	// for (int i = 0; i < points.size(); i++) {
+	// 	points[i]->set_id(i);
+	// 	cpoints.push_back(points[i]);
+	// 	assert(cpoints[i]->get_id() == i);
+	// }
+	return points;
+}
+
+
+// consider all from 'to', distances[].size must be >= to.size()
+template<class T>
+Point<T>* find_nearest(const std::vector<Point<T>*> &to,
+		       vector<int> &good, const std::vector<Point<T>*> &from,
+		       std::vector<int>* distances, int& last_idx)
+{
+	// Step 1. Fill the closest distance list
+	int best_dist = 0;
+	Point<T>* best_pt = NULL;
+	std::vector<int>::iterator best_idx;
+	last_idx %= to.size();
+	for (auto idx = good.begin(); idx != good.end(); idx++) {
+		int i = *idx;
+		distances[last_idx][i] = to[last_idx]->distance(*from[i]);
+		int dist = 0;
+		for (int j = 0; j < to.size(); j++) {
+			dist += distances[j][*idx];
+		}
+		if (best_pt == NULL || dist < best_dist) {
+			best_pt = from[i];
+			best_dist = dist;
+			best_idx = idx;
+		}
+	}
+	cout << "Dist: " << best_dist << endl;
+	last_idx++;
+	good.erase(best_idx);
+	return best_pt;
+}
+
+template<class T>
+void ClusterFactory<T>::sort_nn(std::vector<Point<T> *> &points, Point<T>* nearest_to, int arg) const
+{
+
+	if (points.empty()) {
+		return;
+	}
+	cout << "Sorting points... ";
+	cout.flush();
+	vector<int> good;
+	int good_idx = points.size() - 1;
+	for (int i = 0; i < points.size(); i++) {
+		if (nearest_to != NULL && nearest_to == points[i]) {
+			good_idx = i;
+		}
+		good.push_back(i);
+	}
+	vector<Point<T>*> total_points;
+	total_points.push_back(points[good_idx]);
+	good.erase(good.begin() + good_idx);
+	bool working = true;
+	while (working && good.size() > 0) {
+		working = false;
+		auto last = total_points.back();
+		auto smallest = good.begin();
+		T small_dist = points[*smallest]->distance(*last);
+		for (auto i = good.begin(); i != good.end(); i++) {
+			T dist = points[*i]->distance(*last);// / points[*i]->prob_under(*last);
+			if (dist < small_dist) {
+				small_dist = dist;
+				smallest = i;
+			}
+		}
+		total_points.push_back(points[*smallest]);
+		//	cout << points[*smallest]->get_header() << endl;
+		if (total_points.size() % 100 == 0) {
+			cout << "Size: " << total_points.size() << endl;
+		}
+		good.erase(smallest);
+		working = true;
+	}
+	assert(good.empty());
+	assert(points.size() == total_points.size());
+	points = total_points;
+	cout << "Done" << endl;
+}
+
+template<class T>
+Point<T> *ClusterFactory<T>::get_divergence_point(ChromosomeOneDigit *chrom)
+{
+	if (chrom == NULL) {
+		return NULL;
+	}
+	KmerHashTable<unsigned long, T> table(k, 1);
+	KmerHashTable<unsigned long, uint64_t> table_k1(1, 0);
+	std::vector<T> values;
+	vector<uint64_t> values_k1;
+	values.clear();
+	fill_table<T>(table, chrom, values);
+	fill_table<uint64_t>(table_k1, chrom, values_k1);
+//	int tmplate = get_template(chrom->getHeader(), templates);
+	Point<T> *p =  new DivergencePoint<T>(values, chrom->size());
+//	cout << "mag: " << ((DivergencePoint<T>*)p)->getPseudoMagnitude() << std::endl;
+	p->set_1mers(values_k1);
+	p->set_header(chrom->getHeader());
+	p->set_length(chrom->getBase()->length());
+	p->set_data_str(*chrom->getBase());
+	return p;
+}
+
+
+template<class T>
+Point<T> *ClusterFactory<T>::get_histogram(ChromosomeOneDigit *chrom)
+{
+	if (chrom == NULL) {
+		return NULL;
+	}
+        KmerHashTable<unsigned long, T> table(k, 0);
+	std::vector<T> values;
+	values.clear();
+	fill_table(table, chrom, values);
+//	int tmplate = get_template(chrom->getHeader(), templates);
+//	Point<T> *p = new Histogram<T>(values);
+	Point<T> *p = new DivergencePoint<T>(values, chrom->size());
+	p->set_header(chrom->getHeader());
+	p->set_length(chrom->getBase()->length());
+	return p;
+}
+
+template<class T>
+T ClusterFactory<T>::find_h(const std::vector<Point<T>*> &centers) const
+{
+	int size = centers.size();
+	T div = 0;
+	int num_divergence = 0;
+	vector<T> divs;
+	for (int i = 0; i < size; i++) {
+		for (int j = 0; j < size; j++) {
+			if (j == i) { continue; }
+			divs.push_back(centers[i]->distance(*centers[j]));
+//			num_divergence++;
+		}
+	}
+	std::sort(divs.begin(), divs.end());
+	int end = divs.size() / 50;
+	for (int i = 0; i < end; i++) {
+		div += divs[i];
+	}
+	return div / end / 100;
+	if (divs.size() % 2 == 0) {
+		return (divs[divs.size()/2 - 1] + divs[divs.size()/2]) / 2;
+	} else {
+		return divs[divs.size()/2];
+	}
+}
+/*
+template<class T>
+std::vector<Point<T> *> ClusterFactory<T>::get_centers(const std::vector<Point<T> *> &points)
+{
+	std::vector<Point<T>*> centers;
+	for (typename std::vector<Point<T>*>::const_iterator it = points.begin(); it != points.end(); ++it) {
+		Point<T> *p = *it;
+		if (choose_center(*p)) {
+			centers.push_back(p->clone());
+		}
+	}
+
+	return centers;
+}
+*/
+#ifndef HEADER_HACK
+template class ClusterFactory<double>;
+template class ClusterFactory<int>;
+template class ClusterFactory<uint64_t>;
+template class ClusterFactory<uint32_t>;
+template class ClusterFactory<uint16_t>;
+template class ClusterFactory<uint8_t>;
+
+#endif
diff --git a/src/cluster/src/ClusterFactory.h b/src/cluster/src/ClusterFactory.h
new file mode 100644
index 0000000..12180c9
--- /dev/null
+++ b/src/cluster/src/ClusterFactory.h
@@ -0,0 +1,82 @@
+/* -*- C++ -*-
+ *
+ * ClusterFactory.h
+ *
+ * Author: Benjamin T James
+ */
+
+#ifndef CLUSTERFACTORY_H
+#define CLUSTERFACTORY_H
+
+
+#include <iostream>
+#include <vector>
+#include <functional>
+#include <limits>
+#include "../../nonltr/ChromosomeOneDigit.h"
+#include "../../nonltr/KmerHashTable.h"
+#include "Point.h"
+#include "Trainer.h"
+#include "bvec.h"
+
+template<class T>
+class ClusterFactory {
+public:
+	ClusterFactory(int k_len, int npp=std::numeric_limits<int>::max()) : k(k_len), num_per_partition(npp) {}
+	std::vector<Point<T>*> build_points(vector<string> files, std::function<Point<T>*(ChromosomeOneDigit*)> get_point);
+        Point<T>* get_histogram(ChromosomeOneDigit *chrom);
+	Point<T>* get_divergence_point(ChromosomeOneDigit *chrom);
+	T find_h(const std::vector<Point<T>*> &centers) const;
+	void sort_nn(std::vector<Point<T>*> &points, Point<T>* nearest_to=NULL, int arg=3) const;
+	void MS(bvec<T> &points, T bandwidth, double sim, const Trainer<T>& trn, string output, int iter, int delta);
+private:
+	vector<int> lookup_table;
+	vector<Point<T>*> m_centers;
+	const int num_per_partition;
+	int k;
+	//void fill_table(KmerHashTable<unsigned long, T> &table, ChromosomeOneDigit *chrom, std::vector<T>& values);
+};
+
+template<class V>
+void fill_table(KmerHashTable<unsigned long, V> &table, ChromosomeOneDigit *chrom, std::vector<V>& values)
+{
+	const int k = table.getK();
+	auto segment = chrom->getSegment();
+	const char *seg_bases = chrom->getBase()->c_str();
+	for (vector<int> *v : *segment) {
+		int start = v->at(0);
+		int end = v->at(1);
+		table.wholesaleIncrement(seg_bases, start, end - k + 1);
+	}
+	unsigned long tableSize = table.getMaxTableSize();
+	values.reserve(values.size() + tableSize);
+	const V * valueArray = table.getValues();
+	std::copy(&valueArray[0], &valueArray[tableSize], std::back_inserter(values));
+}
+// template<class V>
+// void fill_table(KmerHashTable<unsigned long, V> &table, ChromosomeOneDigit *chrom, std::vector<V>& values)
+// {
+// 	const int k = table.getK();
+// 	auto segment = chrom->getSegment();
+// 	const char *seg_bases = chrom->getBase()->c_str();
+// 	for (vector<int> *v : *segment) {
+// 		int start = v->at(0);
+// 		int end = v->at(1);
+// 		table.wholesaleIncrement(seg_bases, start, end - k + 1);
+// 	}
+// 	std::vector<std::string> *keys = table.getKeys();
+// 	for (std::string str : *keys) {
+// 		values.push_back(table.valueOf(str.c_str()));
+// 	}
+// 	keys->clear();
+// 	delete keys;
+// }
+
+#ifdef HEADER_HACK
+#ifndef CLUSTERFACTORY_C
+#define CLUSTERFACTORY_C
+#include "ClusterFactory.cpp"
+#endif
+#endif
+
+#endif
diff --git a/src/cluster/src/DivergencePoint.cpp b/src/cluster/src/DivergencePoint.cpp
new file mode 100644
index 0000000..70e4e2d
--- /dev/null
+++ b/src/cluster/src/DivergencePoint.cpp
@@ -0,0 +1,284 @@
+/* -*- C++ -*-
+ *
+ * DivergencePoint.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Main histogram type, includes distance() which is intersection() in Feature.cpp
+ */
+#include "DivergencePoint.h"
+#include <cmath>
+#include <cstring>
+#include <cfenv>
+#include <iostream>
+
+
+template<class T>
+double DivergencePoint<T>::prob_under(Point<T> &p) const
+{
+	const DivergencePoint<T>& c = dynamic_cast<const DivergencePoint<T>&>(p);
+	double sum = 0;
+	const size_t s = points.size();
+	double total = 0;
+	std::feclearexcept(FE_OVERFLOW);
+	std::feclearexcept(FE_UNDERFLOW);
+	for (int i = 0; i < s; i++) {
+		sum += c.points[i];
+		if (i % 4 == 3) {
+			for (int j = i - 3; j <= i; j++) {
+				double prob = c.points[j] / sum;
+				double log_prob = log(prob);
+				total += (points[j] - 1) * log_prob;
+				if ((bool)std::fetestexcept(FE_UNDERFLOW)) {
+					cout << "Underflow!" << endl;
+				}
+				//	cond.push_back(log(prob)/log4);
+			}
+			sum = 0;
+		}
+	}
+	// for (size_t q = 0; q < s; q += 4) {
+	// 	double sum = 0;
+	// 	for (int i = q; i < q + 4; i++) {
+	// 		sum += c.points[i];
+	// 	}
+	// 	for (int i = q; i < q + 4; i++) {
+	// 		double prob = c.points[i] / sum;
+	// 		double log_prob = log(prob);
+	// 		total += (points[i] - 1) * log_prob;
+	// 	}
+	// }
+	return exp(total / s);
+}
+
+template<class T>
+double DivergencePoint<T>::distance_d(Point<double>& p) const
+{
+	const DivergencePoint<double>& c = dynamic_cast<const DivergencePoint<double>&>(p);
+	uint64_t dist = 0;
+	uint64_t mag = 0;
+	for (auto i = 0; i < points.size(); i++) {
+		dist += 2 * min(points[i],(T)c.points[i]);
+		mag += points[i] + c.points[i];
+	}
+	double frac = (double)dist / mag;
+	return 10000.0 * (1.0 - frac * frac);
+}
+
+
+template<class T>
+uint64_t DivergencePoint<T>::distance(const Point<T>& p) const
+{
+	const DivergencePoint<T>& c = dynamic_cast<const DivergencePoint<T>&>(p);
+	uint64_t dist = 0;
+	const uint64_t mag = getPseudoMagnitude() + c.getPseudoMagnitude();
+	#pragma omp simd
+	for (auto i = 0; i < points.size(); i++) {
+		dist += min(points[i], c.points[i]);
+	}
+	dist *= 2;
+	double frac = (double)dist / mag;
+	return 10000.0 * (1.0 - frac * frac);
+}
+
+template<class T>
+double DivergencePoint<T>::distance_k1(const Point<T> &p) const
+{
+	uint64_t dist = 0;
+
+	auto a = Point<T>::get_1mers(), b = p.get_1mers();
+	uint64_t mag = 0;
+	for (auto i = 0; i < 4; i++) {
+		dist += std::min(a[i], b[i]);
+		mag += a[i];
+	}
+	return (double)dist / (double)mag;
+
+}
+template<class T>
+DivergencePoint<T>::DivergencePoint(const std::vector<T>& pts, uint64_t len)
+{
+	mag = 0;
+	points = pts;
+	for (unsigned int i = 0; i < pts.size(); i++) {
+		mag += pts.at(i);
+	}
+//	display();
+	nucl_length = len;
+	to_delete = false;
+	id = 0;
+}
+
+
+template<class T>
+DivergencePoint<T>::DivergencePoint(unsigned int size)
+{
+	for (unsigned int i = 0; i < size; i++) {
+		points.push_back(0);
+	}
+	to_delete = false;
+	nucl_length = 0;
+	id = 0;
+}
+
+template<class T>
+void DivergencePoint<T>::operator*=(double d)
+{
+	unsigned int size = points.size();
+	for (auto& pt : points) {
+		pt *= d;
+	}
+}
+
+template<class T>
+bool DivergencePoint<T>::operator<(Point<T>& p) const
+{
+	const DivergencePoint<T>& h = dynamic_cast<const DivergencePoint<T>&>(p);
+	unsigned int size = std::min(points.size(),h.points.size());
+	/*int boundary = 0;
+	for (unsigned int i = 0; i < size; i++) {
+		if (points.at(i) > h.points.at(i)) {
+			boundary++;
+		} else if (points.at(i) < h.points.at(i)) {
+			boundary--;
+		}
+	}
+	return boundary < 0;*/
+	for (unsigned int i = 0; i < size; i++) {
+		if (points.at(i) >= h.points.at(i)) {
+			return false;
+		}
+	}
+	return true;
+}
+
+template<class T>
+void DivergencePoint<T>::operator/=(double d)
+{
+	unsigned int size = points.size();
+	for (unsigned int i = 0; i < size; i++) {
+		points[i] /= d;
+	}
+//	cout << endl;
+}
+
+template<class T>
+void DivergencePoint<T>::operator+=(Point<T>& p)
+{
+	const DivergencePoint<T>& h = dynamic_cast<const DivergencePoint<T>&>(p);
+	unsigned int size = std::min(points.size(),h.points.size());
+	for (unsigned int i = 0; i < size; i++) {
+		points.at(i) += h.points.at(i);
+	}
+}
+
+template<class T>
+uint64_t DivergencePoint<T>::operator-(const Point<T>& p) const
+{
+	return distance(p);
+}
+
+template<class T>
+void DivergencePoint<T>::set(Point<T>& p)
+{
+	const DivergencePoint<T>& h = dynamic_cast<const DivergencePoint<T>&>(p);
+	points = std::vector<T>(h.points);
+	set_length(h.get_length());
+	to_delete = h.to_delete;
+	Point<T>::set_header(h.get_header());
+	set_id(h.get_id());
+}
+
+template<class T>
+void DivergencePoint<T>::display() const
+{
+	unsigned size = points.size();
+	for (unsigned i = 0; i < size; i++) {
+		std::cout << points.at(i) << " ";
+	}
+	std::cout << std::endl;
+}
+
+template<class T>
+void DivergencePoint<T>::zero()
+{
+	for (auto &i : points) {
+		i = 0;
+	}
+}
+
+template<class T>
+void DivergencePoint<T>::addOne()
+{
+	for (auto& a : points) {
+		a++;
+	}
+}
+
+template<class T>
+void DivergencePoint<T>::subOne()
+{
+	for (auto& a : points) {
+		a--;
+	}
+}
+
+/*
+ * p(y|x) = cond_p
+ * q(y|x) = cond_p
+ */
+template<class T>
+double DivergencePoint<T>::divergence(Point<T>& p) const
+{
+	const DivergencePoint<T>& d = dynamic_cast<const DivergencePoint<T>&>(p);
+	T sum4_p = 0,      sum4_q = 0;                 // Sum for every 4 nucleotides
+        double total_sum_p = 0, total_sum_q = 0;       // Total running sum of all nucleotides
+	double outer_sum_p = 0, outer_sum_q = 0;       // Prior K-mer sum
+	for (int i = 0; i < points.size(); i++) { // Compute divergence for P and Q simultaneously
+		sum4_p += points[i];
+		sum4_q += d.points[i];
+		if (i % 4 == 3) { //finished counting word, now compute probabilities
+			double inner_sum_p = 0;        // Sum of p(X|Y) * log(p(X|Y) / q(X|Y))
+			double inner_sum_q = 0;        // Sum of q(X|Y) * log(q(X|Y) / p(X|Y))
+			for (int j = i - 3; j <= i; j++) {
+				double conditional_p =   points[j] / sum4_p;
+				double conditional_q = d.points[j] / sum4_q;
+				double lg = log(conditional_p) - log(conditional_q);
+				inner_sum_p +=      conditional_p * lg;
+				inner_sum_q += -1 * conditional_q * lg;
+			}
+			outer_sum_p += sum4_p * inner_sum_p;
+			outer_sum_q += sum4_q * inner_sum_q;
+
+			total_sum_p += sum4_p;
+			total_sum_q += sum4_q;
+			sum4_p = 0;
+			sum4_q = 0;
+		}
+	}
+	double left = outer_sum_p / total_sum_p;
+	double right = outer_sum_q / total_sum_q;
+	return (left + right) / 2.0;
+}
+
+template<class T>
+uint64_t DivergencePoint<T>::getPseudoMagnitude() const
+{
+	return mag;
+}
+
+
+template<class T>
+uint64_t DivergencePoint<T>::getRealMagnitude() const
+{
+	return mag - points.size();
+}
+
+#ifndef HEADER_HACK
+template class DivergencePoint<int>;
+template class DivergencePoint<double>;
+template class DivergencePoint<uint64_t>;
+template class DivergencePoint<uint32_t>;
+template class DivergencePoint<uint16_t>;
+template class DivergencePoint<uint8_t>;
+#endif
diff --git a/src/cluster/src/DivergencePoint.h b/src/cluster/src/DivergencePoint.h
new file mode 100644
index 0000000..087bff1
--- /dev/null
+++ b/src/cluster/src/DivergencePoint.h
@@ -0,0 +1,89 @@
+/* -*- C++ -*-
+ *
+ * DivergencePoint.h
+ *
+ * Author: Benjamin T James
+ *
+ * Header for most often used k-mer histogram type
+ */
+#ifndef DIVERGENCE_POINT_H
+#define DIVERGENCE_POINT_H
+#include "Point.h"
+#include <vector>
+template<class T>
+class DivergencePoint : public Point<T> {
+public:
+	DivergencePoint(const std::vector<T>& pts, uint64_t len);
+	DivergencePoint(unsigned int size);
+	~DivergencePoint() { points.clear(); }
+	void operator*=(double d);
+	void operator/=(double d);
+	uint64_t operator-(const Point<T>& p) const;
+	bool operator<(Point<T>& p) const;
+	void operator+=(Point<T>& p);
+	void set(Point<T>& p);
+	void display() const;
+	void zero();
+	void addOne();
+	void subOne();
+	double prob_under(Point<T>& p) const;
+	uint64_t getRealMagnitude() const;
+	uint64_t getPseudoMagnitude() const;
+//	T magnitude() const { return getRealMagnitude(); };
+	double distance_k1(const Point<T>& p) const;
+	double get_stddev() const { return s_dev; };
+	DivergencePoint* clone() const {
+		auto d = new DivergencePoint(points, to_delete);
+		d->set_header(Point<T>::get_header());
+		d->set_id(get_id());
+		d->set_length(get_length());
+		d->set_stddev(get_stddev());
+		return d;
+	}
+	DivergencePoint* create() const {
+		return new DivergencePoint(points.size());
+	}
+	Point<double>* create_double() const {
+		vector<double> v;
+		for (auto val : points) {
+			v.push_back(val);
+		}
+		return new DivergencePoint<double>(v, nucl_length);
+	}
+	void set_arg_to_this_d(Point<double>& p) const {
+		DivergencePoint<double>& c = dynamic_cast< DivergencePoint<double>&>(p);
+		for (int i = 0; i < points.size(); i++) {
+			c.points[i] = points[i];
+		}
+		c.set_id(id);
+	};
+
+
+	bool is_to_delete() const {
+		return to_delete;
+	}
+	void set_to_delete(bool b) {
+		to_delete = b;
+	}
+	double divergence(Point<T>& p) const;
+	double distance_d(Point<double>& p) const;
+	uint64_t distance(const Point<T>& p) const;
+	const vector<T>& get_data() const { return points; }
+	void set_id(uintmax_t c_id) { id = c_id; };
+	const uintmax_t get_id() const { return id; };
+
+	void set_length(unsigned long len) { nucl_length = len; };
+	void set_stddev(double s_dev_) { s_dev = s_dev_; };
+	unsigned long get_length() const { return nucl_length; };
+	unsigned long size() const { return points.size(); };
+	std::vector<T> points;
+
+private:
+	uintmax_t mag;
+	bool to_delete;
+	uint64_t id;
+	uint64_t nucl_length;
+	double s_dev;
+};
+
+#endif
diff --git a/src/cluster/src/Feature.cpp b/src/cluster/src/Feature.cpp
new file mode 100644
index 0000000..67baf50
--- /dev/null
+++ b/src/cluster/src/Feature.cpp
@@ -0,0 +1,1823 @@
+/* -*- C++ -*-
+ *
+ * Feature.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Raw feature methods are here.
+ * Duplicates exist of many of the functions
+ * exist because I was lazy and couldn't get
+ * anonymous functions to work with the hashing
+ */
+#include "Feature.h"
+#include "DivergencePoint.h"
+#include <cmath>
+#include <numeric>
+#include <algorithm>
+#include <limits>
+#include "../../utility/GlobAlignE.h"
+
+
+template<class T>
+Feature<T>::Feature(const Feature<T>& feat_) : k(feat_.get_k())
+{
+	flags = feat_.get_flags();
+	mins = feat_.get_mins();
+	maxs = feat_.get_maxs();
+	is_sims = feat_.get_sims();
+	combos = feat_.get_combos();
+	lookup = feat_.get_lookup();
+	is_finalized = feat_.get_finalized();
+	do_save = false;
+	auto freverse = [](int idx, int k) {
+		int sum = 0;
+		for (int i = 0; i < k; i++) {
+			int rem = idx % 4;
+			idx /= 4;
+			sum = 4 * sum + rem;
+
+		}
+		return sum;
+	};
+	auto freverse_complement = [](int idx, int k) {
+		std::vector<int> v;
+		for (int i = 0; i < k; i++) {
+			v.push_back(3 - idx % 4);
+			idx /= 4;
+		}
+		int sum = 0;
+		for (auto val : v) {
+			sum = 4 * sum + val;
+		}
+		return sum;
+	};
+	for (auto f : lookup) {
+		raw_funcs.push_back(get_func(f));
+	}
+}
+
+
+// void * __gxx_personality_v0=0;
+// void * _Unwind_Resume =0;
+template<class T>
+Feature<T> Feature<T>::operator=(const Feature<T>& feat_)
+{
+	k = feat_.get_k();
+	flags = feat_.get_flags();
+	mins = feat_.get_mins();
+	maxs = feat_.get_maxs();
+	is_sims = feat_.get_sims();
+	combos = feat_.get_combos();
+	lookup = feat_.get_lookup();
+	is_finalized = feat_.get_finalized();
+	do_save = false;
+	auto freverse = [](int idx, int k) {
+		int sum = 0;
+		for (int i = 0; i < k; i++) {
+			int rem = idx % 4;
+			idx /= 4;
+			sum = 4 * sum + rem;
+
+		}
+		return sum;
+	};
+	auto freverse_complement = [](int idx, int k) {
+		std::vector<int> v;
+		for (int i = 0; i < k; i++) {
+			v.push_back(3 - idx % 4);
+			idx /= 4;
+		}
+		int sum = 0;
+		for (auto val : v) {
+			sum = 4 * sum + val;
+		}
+		return sum;
+	};
+	for (auto f : lookup) {
+		raw_funcs.push_back(get_func(f));
+	}
+	return *this;
+}
+
+template<class T>
+void Feature<T>::add_feature(uint64_t f_flags, Combo combo)
+{
+//	cout << "Adding combo " << f_flags << endl;
+	if (combo != Combo::xy && combo != Combo::x2y && combo != Combo::xy2 && combo != Combo::x2y2) {
+		throw "invalid combo";
+	}
+	vector<int> indices;
+	for (uint64_t f = 1; f <= f_flags; f = (f << 1)) {
+		// it is in the new parameter but not currently in store
+		if ((f_flags & f) != 0) {
+			if ((flags & f) == 0) {
+				lookup.push_back(f);
+				raw_funcs.push_back(get_func(f));
+				//cout << "new single feature " << f << endl;
+				mins.push_back(std::numeric_limits<double>::max());
+				maxs.push_back(std::numeric_limits<double>::min());
+				is_sims.push_back(feat_is_sim(f));
+				is_finalized.push_back(false);
+				flags |= f;
+			}
+			indices.push_back(index_of(f));
+		}
+	}
+	combos.push_back(std::make_pair(combo, indices));
+}
+
+template<class T>
+void Feature<T>::finalize()
+{
+	for (size_t i = 0; i < is_finalized.size(); i++) {
+		is_finalized[i] = true;
+	}
+}
+template<class T>
+void Feature<T>::normalize_cache(vector<double> &cache) const
+{
+	for (size_t i = 0; i < lookup.size(); i++) {
+		double val = (cache[i] - mins[i]) / (maxs[i] - mins[i]);
+		if (is_sims[i]) {
+			cache[i] = val;
+		} else {
+			cache[i] = 1 - val;
+		}
+	}
+}
+template<class T>
+vector<double> Feature<T>::compute_all_raw(Point<T> &p, Point<T> &q)
+{
+	vector<double> cache(lookup.size());
+	uint64_t done = 0;
+#ifdef FEATURE_OMP
+#pragma omp parallel for
+#endif
+	for (size_t i = 0; i < lookup.size(); i++) {
+		if ((lookup[i] & done) == 0) {
+//			auto rres = get_func(lookup[i])(p, q);
+			auto rres = raw_funcs[i](p, q);
+			cache[i] = rres;
+		}
+	}
+	return cache;
+}
+
+template<class T>
+void Feature<T>::set_normal(uint64_t single_flag, double min_, double max_)
+{
+	int idx = index_of(single_flag);
+	mins.at(idx) = min_;
+	maxs.at(idx) = max_;
+	is_finalized.at(idx) = true;
+}
+
+template<class T>
+void Feature<T>::normalize(const vector<pra<T> > &pairs)
+{
+
+	for (size_t i = 0; i < lookup.size(); i++) {
+		double small = mins[i], big = maxs[i];
+		if (lookup[i] == FEAT_ALIGN) {
+			mins[i] = 0;
+			maxs[i] = 1;
+			continue;
+		}
+		if (is_finalized[i]) {
+			continue;
+		}
+// #ifdef FEATURE_OMP
+// #pragma omp parallel for reduction(min:small), reduction(max:big)
+// #endif
+		auto func = raw_funcs[i];//
+		// get_func(lookup[i]);
+		for (size_t j = 0; j < pairs.size(); j++) {
+			double val = func(*pairs[j].first, *pairs[j].second);
+			if (val < small) {
+				small = val;
+			}
+			if (val > big) {
+				big = val;
+			}
+		}
+
+		mins[i] = small;
+		maxs[i] = big;
+	}
+};
+
+template<class T>
+vector<std::string> Feature<T>::feat_names()
+{
+	std::vector<std::string> vec;
+	for (int i = 0; i < combos.size(); i++) {
+		auto indices = combos[i].second;
+		std::vector<std::string> names;
+		for (auto s : indices) {
+			names.push_back(feat_name(lookup[s]));
+		}
+		std::string str = "";
+		auto combo = combos[i].first;
+		if (combo == Combo::xy) {
+			str = names[0];
+			for (int j = 1; j < indices.size(); j++) {
+				str += " * " + names[j];
+			}
+		} else if (combo == Combo::xy2 && indices.size() == 2) {
+			str = names[0] + " * " + names[1] + "^2";
+		} else if (combo == Combo::x2y && indices.size() == 2) {
+			str = names[0] + "^2 * " + names[1];
+		} else if (combo == Combo::x2y2) {
+			str = names[0] + "^2";
+			for (int j = 1; j < indices.size(); j++) {
+				str += " * " + names[j] + "^2";
+			}
+		}
+		vec.push_back(str);
+	}
+	return vec;
+}
+
+template<class T>
+std::string Feature<T>::feat_name(uint64_t single_flag)
+{
+	if (single_flag == FEAT_ALIGN) {
+		return "align";
+	} else if (single_flag == FEAT_HELLINGER) {
+		return "hellinger";
+	} else if (single_flag == FEAT_MANHATTAN) {
+		return "manhattan";
+	} else if (single_flag == FEAT_EUCLIDEAN) {
+		return "euclidean";
+	} else if (single_flag == FEAT_CHI_SQUARED) {
+		return "chi_squared";
+	} else if (single_flag == FEAT_NORMALIZED_VECTORS) {
+		return "normalized_vectors";
+	} else if (single_flag == FEAT_HARMONIC_MEAN) {
+		return "harmonic_mean";
+	} else if (single_flag == FEAT_JEFFEREY_DIV) {
+		return "jefferey_divergence";
+	} else if (single_flag == FEAT_K_DIV) {
+		return "k_divergence";
+	} else if (single_flag == FEAT_PEARSON_COEFF) {
+		return "pearson";
+	} else if (single_flag == FEAT_SQCHORD) {
+		return "squared_chord";
+	} else if (single_flag == FEAT_KL_COND) {
+		return "kl_conditional";
+	} else if (single_flag == FEAT_MARKOV) {
+		return "markov";
+	} else if (single_flag == FEAT_INTERSECTION) {
+		return "intersection";
+	} else if (single_flag == FEAT_RRE_K_R) {
+		return "rre_k_r";
+	} else if (single_flag == FEAT_D2z) {
+		return "d2z";
+	} else if (single_flag == FEAT_SIM_MM) {
+		return "sim_mm";
+	} else if (single_flag == FEAT_EUCLIDEAN_Z) {
+		return "euclidean_z";
+	} else if (single_flag == FEAT_EMD) {
+		return "emd";
+	} else if (single_flag == FEAT_SPEARMAN) {
+		return "spearman";
+	} else if (single_flag == FEAT_JACCARD) {
+		return "jaccard";
+	} else if (single_flag == FEAT_LENGTHD) {
+		return "length_difference";
+	} else if (single_flag == FEAT_D2s) {
+		return "d2s";
+	} else if (single_flag == FEAT_AFD) {
+		return "afd";
+	} else if (single_flag == FEAT_MISMATCH) {
+		return "mismatch";
+	} else if (single_flag == FEAT_CANBERRA) {
+		return "canberra";
+	} else if (single_flag == FEAT_KULCZYNSKI1) {
+		return "kulczynski1";
+	} else if (single_flag == FEAT_KULCZYNSKI2) {
+		return "kulczynski2";
+	} else if (single_flag == FEAT_SIMRATIO) {
+		return "simratio";
+	} else if (single_flag == FEAT_JENSEN_SHANNON) {
+		return "jensen_shannon";
+	} else if (single_flag == FEAT_D2_star) {
+		return "d2_star";
+	} else if (single_flag == FEAT_N2R) {
+		return "n2r";
+	} else if (single_flag == FEAT_N2RC) {
+		return "n2rc";
+	} else if (single_flag == FEAT_N2RRC) {
+		return "n2rrc";
+	} else {
+		return "unknown";
+	}
+}
+
+template<class T>
+std::function<double(Point<T>&,Point<T>&)> Feature<T>::get_func_(uint64_t single_flag)
+{
+	std::function<double(Point<T>&,Point<T>&)> func = [&](Point<T>&,Point<T>&)->double {
+		cerr << "Unknown single flag " << single_flag << endl;
+		throw "Function not set";
+	};
+	if (single_flag == FEAT_ALIGN) {
+		func = [&](Point<T>& a,Point<T>& b) {
+			return align(a, b, atable);
+		};
+	} else if (single_flag == FEAT_HELLINGER) {
+		func = hellinger;
+	} else if (single_flag == FEAT_MANHATTAN) {
+		func = manhattan;
+	} else if (single_flag == FEAT_EUCLIDEAN) {
+		func = euclidean;
+	} else if (single_flag == FEAT_CHI_SQUARED) {
+		func = chi_squared;
+	} else if (single_flag == FEAT_NORMALIZED_VECTORS) {
+		func = normalized_vectors;
+	} else if (single_flag == FEAT_HARMONIC_MEAN) {
+		func = harmonic_mean;
+	} else if (single_flag == FEAT_JEFFEREY_DIV) {
+		func = jefferey_divergence;
+	} else if (single_flag == FEAT_K_DIV) {
+		func = k_divergence;
+	} else if (single_flag == FEAT_PEARSON_COEFF) {
+		func = pearson;
+	} else if (single_flag == FEAT_SQCHORD) {
+		func = squaredchord;
+	} else if (single_flag == FEAT_KL_COND) {
+		func = kl_conditional;
+	} else if (single_flag == FEAT_MARKOV) {
+		func = markov;
+	} else if (single_flag == FEAT_INTERSECTION) {
+		func = intersection;
+	} else if (single_flag == FEAT_RRE_K_R) {
+		func = rre_k_r;
+	} else if (single_flag == FEAT_D2z) {
+		func = d2z;
+	} else if (single_flag == FEAT_SIM_MM) {
+		func = sim_mm;
+	} else if (single_flag == FEAT_EUCLIDEAN_Z) {
+		func = euclidean_z;
+	} else if (single_flag == FEAT_EMD) {
+		func = emd;
+	} else if (single_flag == FEAT_SPEARMAN) {
+		func = spearman;
+	} else if (single_flag == FEAT_JACCARD) {
+		func = jaccard;
+	} else if (single_flag == FEAT_LENGTHD) {
+		func = length_difference;
+	} else if (single_flag == FEAT_D2s) {
+		func = d2s;
+	} else if (single_flag == FEAT_AFD) {
+		func = afd;
+	} else if (single_flag == FEAT_MISMATCH) {
+		func = mismatch;
+	} else if (single_flag == FEAT_CANBERRA) {
+		func = canberra;
+	} else if (single_flag == FEAT_KULCZYNSKI1) {
+		func = kulczynski1;
+	} else if (single_flag == FEAT_KULCZYNSKI2) {
+		func = kulczynski2;
+	} else if (single_flag == FEAT_SIMRATIO) {
+		func = simratio;
+	} else if (single_flag == FEAT_JENSEN_SHANNON) {
+		func = [&](Point<T>&a, Point<T>&b) { return jensen_shannon(a, b); };
+	} else if (single_flag == FEAT_D2_star) {
+		func = d2_star;
+	} else if (single_flag == FEAT_N2R) {
+		func = [&](Point<T>&a, Point<T>&b) { return n2r(a, b); };
+	} else if (single_flag == FEAT_N2RC) {
+		func = [&](Point<T>&a, Point<T>&b) { return n2rc(a, b); };
+	} else if (single_flag == FEAT_N2RRC) {
+		func = [&](Point<T>&a, Point<T>&b) { return n2rrc(a, b); };
+	}
+	return func;
+}
+
+template<class T>
+std::function<double(Point<T>&,Point<T>&)> Feature<T>::get_func(uint64_t single_flag)
+{
+//	cout << "SINGLE FLAG: " << single_flag << ": " << Feature<T>::log2(single_flag) << endl;
+	if (!do_save) {
+		return get_func_(single_flag);
+	} else if (single_flag == FEAT_HELLINGER) {
+		return [&](Point<T>& a, Point<T>& b) { return c_hellinger(a,b); };
+	} else if (single_flag == FEAT_MANHATTAN) {
+		return [&](Point<T>& a, Point<T>& b) { return c_manhattan(a,b); };
+	} else if (single_flag == FEAT_EUCLIDEAN) {
+		return [&](Point<T>& a, Point<T>& b) { return c_euclidean(a,b); };
+	} else if (single_flag == FEAT_CHI_SQUARED) {
+		return [&](Point<T>& a, Point<T>& b) { return c_chi_squared(a,b); };
+	} else if (single_flag == FEAT_NORMALIZED_VECTORS) {
+		return [&](Point<T>& a, Point<T>& b) { return c_normalized_vectors(a,b); };
+	} else if (single_flag == FEAT_HARMONIC_MEAN) {
+		return [&](Point<T>& a, Point<T>& b) { return c_harmonic_mean(a,b); };
+	} else if (single_flag == FEAT_JEFFEREY_DIV) {
+		return [&](Point<T>& a, Point<T>& b) { return c_jefferey_divergence(a,b); };
+	} else if (single_flag == FEAT_K_DIV) {
+		return [&](Point<T>& a, Point<T>& b) { return c_k_divergence(a,b); };
+	} else if (single_flag == FEAT_PEARSON_COEFF) {
+		return [&](Point<T>& a, Point<T>& b) { return c_pearson(a,b); };
+	} else if (single_flag == FEAT_SQCHORD) {
+		return [&](Point<T>& a, Point<T>& b) { return c_squaredchord(a,b); };
+	} else if (single_flag == FEAT_KL_COND) {
+		return [&](Point<T>& a, Point<T>& b) { return c_kl_conditional(a,b); };
+	} else if (single_flag == FEAT_MARKOV) {
+		return [&](Point<T>& a, Point<T>& b) { return c_markov(a,b); };
+	} else if (single_flag == FEAT_INTERSECTION) {
+		return [&](Point<T>& a, Point<T>& b) { return c_intersection(a,b); };
+	} else if (single_flag == FEAT_RRE_K_R) {
+		return [&](Point<T>& a, Point<T>& b) { return c_rre_k_r(a,b); };
+	} else if (single_flag == FEAT_D2z) {
+		return [&](Point<T>& a, Point<T>& b) { return c_d2z(a,b); };
+	} else if (single_flag == FEAT_SIM_MM) {
+		return [&](Point<T>& a, Point<T>& b) { return c_sim_mm(a,b); };
+	} else if (single_flag == FEAT_EUCLIDEAN_Z) {
+		return [&](Point<T>& a, Point<T>& b) { return c_euclidean_z(a,b); };
+	} else if (single_flag == FEAT_EMD) {
+		return [&](Point<T>& a, Point<T>& b) { return c_emd(a,b); };
+	} else if (single_flag == FEAT_SPEARMAN) {
+		return [&](Point<T>& a, Point<T>& b) { return c_spearman(a,b); };
+	} else if (single_flag == FEAT_JACCARD) {
+		return [&](Point<T>& a, Point<T>& b) { return c_jaccard(a,b); };
+	} else if (single_flag == FEAT_LENGTHD) {
+		return [&](Point<T>& a, Point<T>& b) { return length_difference(a,b); };
+	} else if (single_flag == FEAT_D2s) {
+		return [&](Point<T>& a, Point<T>& b) { return c_d2s(a,b); };
+	} else if (single_flag == FEAT_AFD) {
+		return [&](Point<T>& a, Point<T>& b) { return c_afd(a,b); };
+	} else if (single_flag == FEAT_MISMATCH) {
+		return [&](Point<T>& a, Point<T>& b) { return c_mismatch(a,b); };
+	} else if (single_flag == FEAT_CANBERRA) {
+		return [&](Point<T>& a, Point<T>& b) { return c_canberra(a,b); };
+	} else if (single_flag == FEAT_KULCZYNSKI1) {
+		return [&](Point<T>& a, Point<T>& b) { return c_kulczynski1(a,b); };
+	} else if (single_flag == FEAT_KULCZYNSKI2) {
+		return [&](Point<T>& a, Point<T>& b) { return c_kulczynski2(a,b); };
+	} else if (single_flag == FEAT_SIMRATIO) {
+		return [&](Point<T>& a, Point<T>& b) { return c_simratio(a,b); };
+	} else if (single_flag == FEAT_JENSEN_SHANNON) {
+		return [&](Point<T>& a, Point<T>& b) { return c_jensen_shannon(a,b); };
+	} else if (single_flag == FEAT_D2_star) {
+		return [&](Point<T>& a, Point<T>& b) { return c_d2_star(a,b); };
+	} else if (single_flag == FEAT_N2R) {
+		return [&](Point<T>& a, Point<T>& b) { return c_n2r(a,b); };
+	} else if (single_flag == FEAT_N2RC) {
+		return [&](Point<T>& a, Point<T>& b) { return c_n2rc(a,b); };
+	} else if (single_flag == FEAT_N2RRC) {
+		return [&](Point<T>& a, Point<T>& b) { return c_n2rrc(a,b); };
+	} else {
+		throw "err";
+	}
+	return get_func_(single_flag);
+}
+template<class T>
+vector<int> Feature<T>::multi_to_log(uint64_t multi)
+{
+	vector<int> ret;
+	for (uint64_t i = 1; i <= 33; i++) {
+		if (((1UL << i) & multi) != 0) {
+			ret.push_back(i);
+		}
+	}
+	return ret;
+}
+
+template<class T>
+bool Feature<T>::feat_is_sim(uint64_t single_flag) const
+{
+	bool unknown = true;
+	bool is_sim = true;
+	switch (single_flag) {
+	case FEAT_ALIGN:
+		is_sim = true;
+		break;
+	case FEAT_HELLINGER:
+		is_sim = false;
+		break;
+	case FEAT_MANHATTAN:
+		is_sim = false;
+		break;
+	case FEAT_EUCLIDEAN:
+		is_sim = false;
+		break;
+	case FEAT_CHI_SQUARED:
+		is_sim = false;
+		break;
+	case FEAT_NORMALIZED_VECTORS:
+		is_sim = true;
+		break;
+	case FEAT_HARMONIC_MEAN:
+		is_sim = true;
+		break;
+	case FEAT_JEFFEREY_DIV:
+		is_sim = false;
+		break;
+	case FEAT_K_DIV:
+		is_sim = false;
+		break;
+	case FEAT_PEARSON_COEFF:
+		is_sim = true;
+		break;
+	case FEAT_SQCHORD:
+		is_sim = false;
+		break;
+	case FEAT_KL_COND:
+		is_sim = false;
+		break;
+	case FEAT_MARKOV:
+		is_sim = true;
+		break;
+	case FEAT_INTERSECTION:
+		is_sim = true;
+		break;
+	case FEAT_RRE_K_R:
+		is_sim = false;
+		break;
+	case FEAT_D2z:
+		is_sim = true;
+		break;
+	case FEAT_SIM_MM:
+		is_sim = true;//probably yes
+		break;
+	case FEAT_EUCLIDEAN_Z:
+		is_sim = false;
+		break;
+	case FEAT_EMD:
+		is_sim = false;
+		break;
+	case FEAT_SPEARMAN:
+		is_sim = true;
+		break;
+	case FEAT_JACCARD:
+		is_sim = true;
+		break;
+	case FEAT_LENGTHD:
+		is_sim = false;
+		break;
+	case FEAT_D2s:
+		is_sim = true;
+		break;
+	case FEAT_AFD:
+		is_sim = false;
+		break;
+	case FEAT_MISMATCH:
+		is_sim = false;
+		break;
+	case FEAT_CANBERRA:
+		is_sim = false;
+		break;
+	case FEAT_KULCZYNSKI1:
+		is_sim = false;
+		break;
+	case FEAT_KULCZYNSKI2:
+		is_sim = true;
+		break;
+	case FEAT_SIMRATIO:
+		is_sim = true;
+		break;
+	case FEAT_JENSEN_SHANNON:
+		is_sim = false;
+		break;
+	case FEAT_D2_star:
+		is_sim = true;
+		break;
+	case FEAT_N2R:
+		is_sim = true;
+		break;
+	case FEAT_N2RC:
+		is_sim = true;
+		break;
+	case FEAT_N2RRC:
+		is_sim = true;
+		break;
+	default:
+		cerr << "bad feature flag " << single_flag << " aka 2^" << log(single_flag) << endl;
+
+		throw single_flag;
+	}
+	return is_sim;
+}
+
+
+template<class T>
+double Feature<T>::c_kulczynski2(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_KULCZYNSKI2));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = kulczynski2(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::kulczynski2(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uint64_t min_sum = 0;
+	double ap = (double)p.getPseudoMagnitude() / N;
+	double aq = (double)q.getPseudoMagnitude() / N;
+	for (auto i = 0; i < N; i++) {
+		min_sum += std::min(p.points[i], q.points[i]);
+	}
+	double coeff = N * (ap + aq) / (2 * ap * aq);
+	return coeff * min_sum;
+}
+template<class T>
+double Feature<T>::align(Point<T> &a, Point<T> &b, std::map<std::pair<uintmax_t, uintmax_t>, double> &atbl)
+{
+	auto ai = a.get_id();
+	auto bi = b.get_id();
+	std::pair<uintmax_t, uintmax_t> pr = ai < bi ? std::make_pair(ai, bi) : std::make_pair(bi, ai);
+	auto res = atbl.find(pr);
+	if (res == atbl.end()) {
+		auto sa = a.get_data_str();
+		auto sb = b.get_data_str();
+		int la = sa.length();
+		int lb = sb.length();
+		GlobAlignE galign(sa.c_str(), 0, la-1,
+				  sb.c_str(), 0, lb-1,
+				  1, -1, 2, 1);
+		double val = galign.getIdentity();
+#pragma omp critical
+		atbl[pr] = val;
+		return val;
+	} else {
+		return res->second;
+	}
+}
+
+template<class T>
+double Feature<T>::c_squaredchord(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_SQCHORD));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = squaredchord(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::squaredchord(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double sum = 0;
+	for (auto i = 0; i < N; i++) {
+		sum += p.points[i] + q.points[i] - 2 * sqrt(p.points[i] * q.points[i]);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_intersection(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_INTERSECTION));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = intersection(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::intersection(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uintmax_t dist = 0;
+	uintmax_t mag = p.getPseudoMagnitude() + q.getPseudoMagnitude();
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		dist += 2 * std::min(p.points[i], q.points[i]);
+	}
+	return (double)dist / (double)mag;
+}
+
+template<class T>
+double Feature<T>::c_pearson(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_PEARSON_COEFF));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = pearson(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::pearson(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double dap = (double)p.getPseudoMagnitude() / N;
+	double daq = (double)q.getPseudoMagnitude() / N;
+	double dot = 0, np = 0, nq = 0;
+	for (auto i = 0; i < N; i++) {
+	        double dp = p.points[i] - dap;
+	        double dq = q.points[i] - daq;
+		np += dp * dp;
+		nq += dq * dq;
+		dot += dp * dq;
+	}
+	return dot / sqrt(std::max(np * nq, 0.5));
+}
+
+template<class T>
+double Feature<T>::c_simratio(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_SIMRATIO));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = simratio(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::simratio(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uintmax_t dot = 0, norm2 = 0;
+	for (auto i = 0; i < N; i++) {
+		intmax_t diff = p.points[i] - q.points[i];
+		dot += p.points[i] * q.points[i];
+		norm2 += diff * diff;
+	}
+	return dot / (dot + sqrt(norm2));
+}
+
+template<class T>
+double Feature<T>::c_manhattan(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_MANHATTAN));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = manhattan(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::manhattan(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	auto N = p.points.size();
+	int sum = 0;
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		sum += p.points[i] > q.points[i] ? p.points[i] - q.points[i] : q.points[i] - p.points[i];
+	}
+//	std::cout << "manhattan: " << sum << std::endl;
+	return sum;
+}
+
+template<class T>
+double Feature<T>::length_difference(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	auto lp = p.get_length();
+	auto lq = q.get_length();
+	if (lp == 0 || lq == 0) {
+		cerr << "lp: " << lp << " lq: " << lq << endl;
+		throw 123;
+	}
+	auto ret = (lp > lq) ? (lp - lq) : (lq - lp);
+//	std::cout << "length difference: " << ret << std::endl;
+	return ret;
+}
+
+
+double neighbor(double *cp, double *cq, double ap, double aq, const size_t N)
+{
+	double sp = 0, sq = 0;
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		double dp = cp[i] - ap;
+		double dq = cq[i] - aq;
+		sp += dp * dp;
+		sq += dq * dq;
+	}
+	sp = sqrt(sp / N);
+	sq = sqrt(sq / N);
+	double psum = 0, qsum = 0;
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		cp[i] = (cp[i] - ap) / sp;
+		cq[i] = (cq[i] - aq) / sq;
+		psum += cp[i] * cp[i];
+		qsum += cq[i] * cq[i];
+	}
+	double total = 0;
+	psum = sqrt(psum);
+	qsum = sqrt(qsum);
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		cp[i] /= psum;
+		cq[i] /= qsum;
+		total += cp[i] * cq[i];
+	}
+	return total;
+}
+
+template<class T>
+double Feature<T>::c_n2rrc(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_N2RRC));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = n2rrc(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::n2rrc(Point<T>& a, Point<T>& b) const
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double *cp = new double[N];
+	double *cq = new double[N];
+	double ap = 0, aq = 0;
+	for (auto i = 0; i < N; i++) {
+		int j = reverse.at(i);
+		int h = reverse_complement.at(i);
+		cp[i] = p.points[h] + p.points[i] + p.points[j];
+		cq[i] = q.points[h] + q.points[i] + q.points[j];
+		ap += cp[i];
+		aq += cq[i];
+	}
+	ap /= N;
+	aq /= N;
+	double total = neighbor(cp, cq, ap, aq, N);
+	delete[] cp;
+	delete[] cq;
+//	std::cout << "n2rrc: " << total << std::endl;
+	return total;
+}
+
+template<class T>
+double Feature<T>::c_jensen_shannon(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_JENSEN_SHANNON));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = jensen_shannon(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::jensen_shannon(Point<T> &a, Point<T> &b) const
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	uint64_t mp = p.getPseudoMagnitude();
+	uint64_t mq = q.getPseudoMagnitude();
+	double sum = 0;
+	const auto N = p.points.size();
+        #pragma omp simd reduction(+:sum)
+	for (auto i = 0; i < N; i++) {
+		double pp = (double)p.points[i] / mp;
+		double pq = (double)q.points[i] / mq;
+		double avg = 0.5 * (pp + pq);
+		#ifndef USETBL
+		double lp = // tbl[(int)(coeff * pp / avg)];
+			log(pp / avg);
+		double lq = // tbl[(int)(coeff * pq / avg)];
+			log(pq / avg);
+		#else
+		double lp = tbl[(int)(coeff * pp / avg)];
+		double lq = tbl[(int)(coeff * pq / avg)];
+		#endif
+	        sum += pp * lp + pq * lq;
+	}
+	return sum / 2;
+}
+
+template<class T>
+double Feature<T>::c_rre_k_r(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_RRE_K_R));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = rre_k_r(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::rre_k_r(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double op = 0, oq = 0;
+	const double l4 = log(4);
+	uint64_t sum4_p = 0, sum4_q = 0;
+	for (auto i = 0; i < N; i++) {
+		sum4_p += p.points[i];
+		sum4_q += q.points[i];
+		if (i % 4 == 3) {
+			double inner_sum_p = 0;
+			double inner_sum_q = 0;
+			for (auto j = i - 3; j <= i; j++) {
+				double conditional_p = (double)p.points[j] / sum4_p;
+				double conditional_q = (double)q.points[j] / sum4_q;
+				double avg = 0.5 * (conditional_p + conditional_q);
+				inner_sum_p += (double)(p.points[j])
+					* log(conditional_p / avg) / sum4_p;
+				inner_sum_q += (double)(q.points[j])
+					* log(conditional_q / avg) / sum4_q;
+			}
+			op += inner_sum_p;
+			oq += inner_sum_q;
+			sum4_p = 0;
+			sum4_q = 0;
+		}
+	}
+        double val = 0.5 * (op + oq);
+	return val;
+}
+
+
+template<class T>
+double Feature<T>::c_hellinger(Point<T>& a, Point<T>& b) {
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_HELLINGER));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = hellinger(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::hellinger(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double ap = (double)p.getPseudoMagnitude() / N;
+	double aq = (double)q.getPseudoMagnitude() / N;
+	double sum = 0;
+	for (auto i = 0; i < N; i++) {
+		double diff = sqrt(p.points[i] / ap) - sqrt(q.points[i] / aq);
+		sum += diff * diff;
+	}
+	return sqrt(2 * sum);
+}
+
+template<class T>
+double Feature<T>::c_euclidean(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_EUCLIDEAN));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = euclidean(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::euclidean(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uintmax_t sum = 0;
+	for (auto i = 0; i < N; i++) {
+		auto diff = p.points[i] - q.points[i];
+		sum += diff * diff;
+	}
+	return sqrt(sum);
+}
+
+template<class T>
+double Feature<T>::c_chi_squared(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_CHI_SQUARED));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = chi_squared(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::chi_squared(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double sum = 0;
+	for (auto i = 0; i < N; i++) {
+		auto diff = p.points[i] - q.points[i];
+		sum += (double)(diff * diff) / (p.points[i] + q.points[i]);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_normalized_vectors(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_NORMALIZED_VECTORS));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = normalized_vectors(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::normalized_vectors(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uintmax_t sum = 0;
+	uintmax_t d1 = 0, d2 = 0;
+	for (auto i = 0; i < N; i++) {
+		sum += p.points[i] * q.points[i];
+		d1 += p.points[i] * p.points[i];
+		d2 += q.points[i] * q.points[i];
+	}
+	return (double)sum / sqrt(d1 * d2);
+}
+
+template<class T>
+double Feature<T>::c_harmonic_mean(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_HARMONIC_MEAN));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = harmonic_mean(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::harmonic_mean(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double sum = 0;
+	for (auto i = 0; i < N; i++) {
+		double numer = p.points[i] * q.points[i];
+		sum += numer / (p.points[i] + q.points[i]);
+	}
+	return 2 * sum;
+}
+
+template<class T>
+double Feature<T>::c_jefferey_divergence(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_JEFFEREY_DIV));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = jefferey_divergence(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::jefferey_divergence(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	uint64_t mp = p.getPseudoMagnitude();
+	uint64_t mq = q.getPseudoMagnitude();
+	double sum = 0;
+	const auto N = p.points.size();
+	for (auto i = 0; i < N; i++) {
+		double pp = (double)p.points[i] / mp;
+		double pq = (double)q.points[i] / mq;
+		double diff = pp - pq;
+	        sum += diff * log(pp / pq);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_k_divergence(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_K_DIV));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = k_divergence(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::k_divergence(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	uint64_t mp = p.getPseudoMagnitude();
+	uint64_t mq = q.getPseudoMagnitude();
+	double sum = 0;
+	const auto N = p.points.size();
+	for (auto i = 0; i < N; i++) {
+		double pp = (double)p.points[i] / mp;
+		double pq = (double)q.points[i] / mq;
+		double avg = 0.5 * (pp + pq);
+	        sum += pp * log(pp / avg);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_kl_conditional(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_KL_COND));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = kl_conditional(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::kl_conditional(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	uint64_t sum4_p = 0,    sum4_q = 0;            // Sum for every 4 nucleotides
+	double outer_sum_p = 0, outer_sum_q = 0;       // Prior K-mer sum
+	const auto N = p.points.size();
+	for (auto i = 0; i < N; i++) {
+		sum4_p += p.points[i];
+		sum4_q += q.points[i];
+		if (i % 4 == 3) { //finished counting word, now compute probabilities
+			double inner_sum_p = 0;        // Sum of p(X|Y) * log(p(X|Y) / q(X|Y))
+			double inner_sum_q = 0;        // Sum of q(X|Y) * log(q(X|Y) / p(X|Y))
+			for (auto j = i - 3; j <= i; j++) {
+				double conditional_p = (double)p.points[j] / sum4_p;
+				double conditional_q = (double)q.points[j] / sum4_q;
+				double lg = log(conditional_p / conditional_q);
+				inner_sum_p +=      conditional_p * lg;
+				inner_sum_q += -1 * conditional_q * lg;
+			}
+			outer_sum_p += sum4_p * inner_sum_p;
+			outer_sum_q += sum4_q * inner_sum_q;
+
+			sum4_p = 0;
+			sum4_q = 0;
+		}
+	}
+	double left = outer_sum_p / p.getPseudoMagnitude();
+	double right = outer_sum_q / q.getPseudoMagnitude();
+	return (left + right) / 2.0;
+}
+
+template<class T>
+double Feature<T>::c_markov(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_MARKOV));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = markov(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::markov(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(b);
+	double total = 0;       // Prior K-mer sum
+	const auto N = p.points.size();
+	for (auto i = 0; i < N; i += 4) {
+		uint64_t psum = 0, qsum = 0;
+		for (auto j = 0; j < 4; j++) {
+			psum += p.points[i+j];
+			qsum += q.points[i+j];
+		}
+		double lpsum = log(psum);
+		double lqsum = log(qsum);
+		for (auto j = 0; j < 4; j++) {
+			total += (q.points[i+j]-1) * (log(p.points[i+j]) - lpsum);
+			total += (p.points[i+j]-1) * (log(q.points[i+j]) - lqsum);
+		}
+        }
+	return total / 2;
+}
+
+template<class T>
+double Feature<T>::c_d2z(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_D2z));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = d2z(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::d2z(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	double sum = 0;
+	const auto N = p.points.size();
+	double ap = (double)p.getPseudoMagnitude() / N;
+	double aq = (double)q.getPseudoMagnitude() / N;
+	double sp = p.get_stddev(), sq = q.get_stddev();
+	for (auto i = 0; i < N; i++) {
+		double pz = (p.points[i] - ap) / sp;
+		double qz = (q.points[i] - aq) / sq;
+		sum += pz * qz;
+        }
+	return sum;
+}
+
+template<class T>
+double d_markov(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	return log(Feature<T>::markov(b, a) / Feature<T>::markov(b, b)) / q.getRealMagnitude();
+}
+
+template<class T>
+double Feature<T>::c_sim_mm(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_SIM_MM));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = sim_mm(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::sim_mm(Point<T>& a, Point<T>& b)
+{
+	return 1 - exp(0.5 * (d_markov(a, b) + d_markov(b, a)));
+}
+
+template<class T>
+double Feature<T>::c_euclidean_z(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_EUCLIDEAN_Z));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = euclidean_z(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::euclidean_z(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	double sum = 0;
+	const auto N = p.points.size();
+	double ap = (double)p.getPseudoMagnitude() / N;
+	double aq = (double)q.getPseudoMagnitude() / N;
+	double sp = p.get_stddev(), sq = q.get_stddev();
+	for (auto i = 0; i < N; i++) {
+		double pz = (p.points[i] - ap) / sp;
+		double qz = (q.points[i] - aq) / sq;
+		sum += (pz - qz) * (pz - qz);
+        }
+	return sqrt(sum);
+}
+
+template<class T>
+double Feature<T>::c_emd(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_EMD));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = emd(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::emd(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uintmax_t cp = 0, cq = 0;
+	uintmax_t dist = 0;
+	for (auto i = 0; i < N; i++) {
+		cp += p.points[i];
+		cq += q.points[i];
+		dist += cp > cq ? cp - cq : cq - cp;
+	}
+	return (double)dist;
+}
+
+template<class T>
+std::vector<size_t> tiedrank(const Point<T>& a)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const auto N = p.points.size();
+	vector<size_t> ip(N, 0);
+	std::iota(std::begin(ip), std::end(ip), 0);
+	std::sort(std::begin(ip), std::end(ip), [&](size_t x, size_t y) {
+			return p.points[x] < p.points[y];
+		});
+}
+
+template<class T>
+double Feature<T>::c_spearman(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_SPEARMAN));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = spearman(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::spearman(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	vector<size_t> ip(N, 0);
+	vector<size_t> iq(N, 0);
+	std::iota(std::begin(ip), std::end(ip), 0);
+	std::iota(std::begin(iq), std::end(iq), 0);
+	std::sort(std::begin(ip), std::end(ip), [&](size_t x, size_t y) {
+			return p.points[x] < p.points[y];
+		});
+	std::sort(std::begin(iq), std::end(iq), [&](size_t x, size_t y) {
+			return q.points[x] < q.points[y];
+		});
+	double expected = (N+1) / 2.0;
+	double cov = 0;
+	double sp = 0;
+	double sq = 0;
+	for (auto i = 0; i < N; i++) {
+		cov += (ip[i] - expected) * (iq[i] - expected);
+		sp += (ip[i] - expected) * (ip[i] - expected);
+		sq += (iq[i] - expected) * (iq[i] - expected);
+        }
+	return (N * cov) / (sp * sq);
+}
+
+template<class T>
+double Feature<T>::c_jaccard(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_JACCARD));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = jaccard(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::jaccard(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uint64_t sum = 0;
+	for (auto i = 0; i < N; i++) {
+		if (p.points[i] == q.points[i] && p.points[i] > 1) {
+			sum++;
+		}
+	}
+	return (double)sum / N;
+}
+
+template<class T>
+double Feature<T>::c_d2s(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_D2s));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = d2s(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::d2s(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	const int k = (int)(log(N) / log(4));
+	const auto p1 = p.get_1mers();
+	const auto q1 = q.get_1mers();
+	const double pmag = p.getPseudoMagnitude();
+	const double qmag = q.getPseudoMagnitude();
+	double sum = 0;
+	for (size_t i = 0; i < N; i++) {
+		double p1i = 1;
+	        double q1i = 1;
+	        size_t idx = i;
+		for (int j = 0; j < k; j++) {
+			int i1 = idx % 4;
+			idx /= 4;
+			p1i *= (double)p1[i1] / pmag;
+			q1i *= (double)q1[i1] / qmag;
+		}
+		double hp = p.points[i] - pmag * p1i;
+		double hq = q.points[i] - qmag * q1i;
+		if (hp != 0 && hq != 0) {
+			sum += hp * hq / hypot(hp, hq);
+		}
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_afd(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_AFD));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = afd(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::afd(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	const int k = (int)(log(N) / log(4));
+	const auto p1 = p.get_1mers();
+	const auto q1 = q.get_1mers();
+	const auto pmag = p.getPseudoMagnitude();
+	const auto qmag = q.getPseudoMagnitude();
+	double sum = 0;
+	const auto nMinusOne = N / 4;
+	const auto nMinusTwo = nMinusOne / 4;
+	int first_i = 0;
+	for (auto i = 0; i < N; i += nMinusTwo) {
+// 16 iterations total, iterating through all 2-mers
+		uint64_t psum = 0, qsum = 0;
+		for (auto j = i; j < i + nMinusTwo; j++) {
+			psum += p.points[j];
+			qsum += q.points[j];
+		}
+		double x = (double)psum / p1[first_i / 4];
+		double y = (double)qsum / q1[first_i / 4];
+		first_i++;
+
+
+		double diff = x - y;
+	        double unsquared = (diff * pow(1+diff, -14));
+		sum += unsquared * unsquared;
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_mismatch(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_MISMATCH));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = mismatch(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::mismatch(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	uint64_t sum = 0;
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		sum += (p.points[i] != q.points[i]);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_canberra(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_CANBERRA));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = canberra(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::canberra(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double sum = 0;
+	#pragma omp simd
+	for (auto i = 0; i < N; i++) {
+		auto numer = p.points[i] > q.points[i] ? p.points[i] - q.points[i] : q.points[i] - p.points[i];
+		auto denom = p.points[i] + q.points[i];
+		sum += (double)numer / denom;
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_kulczynski1(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_KULCZYNSKI1));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = kulczynski1(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::kulczynski1(Point<T> &a, Point<T> &b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double sum = 0;
+	for (auto i = 0; i < N; i++) {
+		auto numer = p.points[i] > q.points[i] ? p.points[i] - q.points[i] : q.points[i] - p.points[i];
+		auto denom = std::min(p.points[i], q.points[i]);
+		sum += (double)numer / denom;
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_d2_star(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_D2_star));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = d2_star(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::d2_star(Point<T>& a, Point<T>& b)
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	const int k = (int)(log(N) / log(4));
+	const auto p1 = p.get_1mers();
+	const auto q1 = q.get_1mers();
+
+	const auto pmag = p.getPseudoMagnitude();
+	const auto qmag = q.getPseudoMagnitude();
+	double sum = 0;
+	vector<double> tilde(4, 0);
+	for (int i = 0; i < 4; i++) {
+		tilde[i] = (double)(p1[i] + q1[i]) / (pmag + qmag);
+	}
+	const double L = sqrt(pmag * qmag);
+	for (auto i = 0; i < N; i++) {
+		double p1i = 1;
+	        double q1i = 1;
+		double tilde_i = 1;
+	        auto idx = i;
+		for (int j = 0; j < k; j++) {
+			auto i1 = idx % 4;
+			idx /= 4;
+			p1i *= (double)p1[i1] / pmag;
+			q1i *= (double)q1[i1] / qmag;
+			tilde_i *= tilde[i1];
+		}
+		double hp = p.points[i] - pmag * p1i;
+		double hq = q.points[i] - qmag * q1i;
+		sum += hp * hq / (L * tilde_i);
+	}
+	return sum;
+}
+
+template<class T>
+double Feature<T>::c_n2r(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_N2R));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = n2r(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::n2r(Point<T>& a, Point<T>& b) const
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double *cp = new double[N];
+	double *cq = new double[N];
+	double ap = 0, aq = 0;
+	for (auto i = 0; i < N; i++) {
+		int j = reverse.at(i);
+		cp[i] = p.points[i] + p.points[j];
+		cq[i] = q.points[i] + q.points[j];
+		ap += cp[i];
+		aq += cq[i];
+	}
+	ap /= N;
+	aq /= N;
+	double total = neighbor(cp, cq, ap, aq, N);
+	delete[] cp;
+	delete[] cq;
+	return total;
+}
+
+template<class T>
+double Feature<T>::c_n2rc(Point<T>& a, Point<T>& b) {
+
+	auto aid = a.get_id();
+	auto bid = b.get_id();
+	auto tup = std::tuple<uintmax_t, uintmax_t, uint8_t>(aid, bid, Feature<T>::log2(FEAT_N2RC));
+	if (ltable.find(tup) == ltable.end()) {
+		double val = n2rc(a, b);
+		ltable.insert({tup, val});
+		return val;
+	} else {
+		return ltable.at(tup);
+	}
+}
+
+template<class T>
+double Feature<T>::n2rc(Point<T>& a, Point<T>& b) const
+{
+	const DivergencePoint<T>& p = dynamic_cast<const DivergencePoint<T>&>(a);
+	const DivergencePoint<T>& q = dynamic_cast<const DivergencePoint<T>&>(b);
+	const auto N = p.points.size();
+	double *cp = new double[N];
+	double *cq = new double[N];
+	double ap = 0, aq = 0;
+	for (auto i = 0; i < N; i++) {
+		int h = reverse_complement.at(i);
+		cp[i] = p.points[h] + p.points[i];
+		cq[i] = q.points[h] + q.points[i];
+		ap += cp[i];
+		aq += cq[i];
+	}
+	ap /= N;
+	aq /= N;
+	double total = neighbor(cp, cq, ap, aq, N);
+	delete[] cp;
+	delete[] cq;
+	return total;
+}
+
+template class Feature<uint8_t>;
+template class Feature<uint16_t>;
+template class Feature<uint32_t>;
+template class Feature<uint64_t>;
+template class Feature<int>;
+template class Feature<double>;
diff --git a/src/cluster/src/Feature.h b/src/cluster/src/Feature.h
new file mode 100644
index 0000000..ba7f73e
--- /dev/null
+++ b/src/cluster/src/Feature.h
@@ -0,0 +1,380 @@
+/* -*- C++ -*-
+ *
+ * Feature.h
+ *
+ * Author: Benjamin T James
+ *
+ * Class containing all features and a glue to bind them together,
+ * shared indivual features can be shared through hashing if sequence
+ * id's are set.
+ */
+#ifndef FEATURES_H
+#define FEATURES_H
+
+#include "SingleFeature.h"
+#include <cmath>
+#include <algorithm>
+#include <map>
+
+#define	FEAT_ALIGN               (1UL << 0)
+#define	FEAT_HELLINGER           (1UL << 1)
+#define	FEAT_MANHATTAN           (1UL << 2)
+#define	FEAT_EUCLIDEAN           (1UL << 3)
+#define	FEAT_CHI_SQUARED         (1UL << 4)
+#define	FEAT_NORMALIZED_VECTORS  (1UL << 5)
+#define	FEAT_HARMONIC_MEAN       (1UL << 6)
+#define	FEAT_JEFFEREY_DIV        (1UL << 7)
+#define	FEAT_K_DIV               (1UL << 8)
+#define	FEAT_PEARSON_COEFF       (1UL << 9)
+#define	FEAT_SQCHORD             (1UL << 10)
+#define	FEAT_KL_COND             (1UL << 11)
+#define	FEAT_MARKOV              (1UL << 12)
+#define	FEAT_INTERSECTION        (1UL << 13)
+#define	FEAT_RRE_K_R             (1UL << 14)
+#define	FEAT_D2z                 (1UL << 15)
+#define	FEAT_SIM_MM              (1UL << 16)
+#define	FEAT_EUCLIDEAN_Z         (1UL << 17)
+#define	FEAT_EMD                 (1UL << 18)
+#define	FEAT_SPEARMAN            (1UL << 19)
+#define	FEAT_JACCARD             (1UL << 20)
+#define	FEAT_LENGTHD             (1UL << 21)
+#define	FEAT_D2s                 (1UL << 22)
+#define	FEAT_AFD                 (1UL << 23)
+#define	FEAT_MISMATCH            (1UL << 24)
+#define	FEAT_CANBERRA            (1UL << 25)
+#define	FEAT_KULCZYNSKI1         (1UL << 26)
+#define	FEAT_KULCZYNSKI2         (1UL << 27)
+#define	FEAT_SIMRATIO            (1UL << 28)
+#define	FEAT_JENSEN_SHANNON      (1UL << 29)
+#define	FEAT_D2_star             (1UL << 30)
+#define	FEAT_N2R                 (1UL << 31)
+#define	FEAT_N2RC                (1UL << 32)
+#define	FEAT_N2RRC               (1UL << 33)
+
+enum class Combo {
+	xy,
+	x2y2,
+	xy2,
+	x2y
+};
+
+template<class T>
+struct pra {
+	Point<T>* first;
+	Point<T>* second;
+	double val;
+	pra() {}
+	pra(const pra<T>&f) : first(f.first), second(f.second), val(f.val) {}
+	pra(Point<T>* a, Point<T>* b, double c) : first(a), second(b), val(c) {}
+	pra<T> deep_clone() const {
+		return pra(first->clone(), second->clone(), val);
+	}
+};
+
+/*
+ * Usage:
+ *   add_feature(FEAT_LD | FEAT_INTERSECTION, COMBO_SELF);
+ *   add_feature(FEAT_LD | FEAT_JENSONSHANNON, COMBO_SELF);
+ *
+ *   normalize(some_pairs_to_normalize)
+ *   normalize(more_pairs_to_normalize)
+ *   finalize()
+ *
+ *   add_feature(....);
+ *
+ *   normalize(some_pairs_to_normalize)
+ *   normalize(more_pairs_to_normalize)
+ *   finalize()
+ *
+ *   compute(p,q)
+ *   for (size_t i = 0; i < feature.size(); i++) {
+ *       cout << feature[i] << endl;
+ *   }
+ */
+template<class T>
+class Feature {
+public:
+	Feature(const Feature<T>& feat_);
+	Feature<T> operator=(const Feature<T>& feat_);
+	Feature(const int k_) : k(k_) {
+		flags = 0;
+		auto freverse = [](int idx, int k) {
+			int sum = 0;
+			for (int i = 0; i < k; i++) {
+				int rem = idx % 4;
+				idx /= 4;
+				sum = 4 * sum + rem;
+
+			}
+			return sum;
+		};
+		auto freverse_complement = [](int idx, int k) {
+			std::vector<int> v;
+			for (int i = 0; i < k; i++) {
+				v.push_back(3 - idx % 4);
+				idx /= 4;
+			}
+			int sum = 0;
+			for (auto val : v) {
+				sum = 4 * sum + val;
+			}
+			return sum;
+		};
+
+		uint64_t k4 = 1;
+		for (int i = 0; i < k; i++) {
+			k4 *= 4;
+		}
+		for (int i = 0; i < k4; i++) {
+			reverse.push_back(freverse(i, k));
+			reverse_complement.push_back(freverse_complement(i, k));
+		}
+	}
+	void add_feature(uint64_t f_flags, Combo combo=Combo::xy);
+	static vector<int> multi_to_log(uint64_t multi);
+	vector<std::string> feat_names();
+	static std::string feat_name(uint64_t single);
+	void finalize();
+
+	void remove_feature() { // Tear down features SPECIFIC to last pairing
+		// auto indices_to_rm = combos.back().second;
+		// combos.pop_back();
+		// uint64_t feat_flags;
+
+
+		// TO_DEL = TO_RM & (INDICES - REST)
+
+		vector<int> vec = combos.back().second;
+		combos.pop_back();
+		for (auto combo : combos) {
+			for (auto idx : combo.second) {
+				vec.erase(std::remove(vec.begin(), vec.end(), idx), vec.end());
+			}
+		}
+		std::sort(vec.begin(), vec.end(), std::greater<int>());
+		for (int idx : vec) {
+			flags ^= lookup[idx];
+			lookup.erase(lookup.begin() + idx);
+			raw_funcs.erase(raw_funcs.begin() + idx);
+			mins.erase(mins.begin() + idx);
+			maxs.erase(maxs.begin() + idx);
+			is_sims.erase(is_sims.begin() + idx);
+			is_finalized.erase(is_finalized.begin() + idx);
+		}
+		// flags ^= lookup[idx]
+		// lookup[IDX]
+		// raw_funcs[IDX]
+		// mins[idx]
+		// maxs[idx]
+		// is_sims[idx]
+		// is_finalized[idx]
+	}
+	void normalize(const vector<pra<T> > &pairs);
+	void set_normal(uint64_t single_flag, double min, double max);
+	vector<double> compute(Point<T>& p, Point<T>& q) {
+		vector<double> cache = compute_all_raw(p, q);
+		normalize_cache(cache);
+		return cache;
+	};
+	double operator()(int col, const vector<double>& cache) const {
+		auto pr = combos.at(col);
+		Combo combo = pr.first;
+		auto indices = pr.second;
+		if (combo == Combo::xy) {
+			double prod = 1;
+			for (auto idx : indices) {
+				prod *= cache[idx];
+			}
+			return prod;
+		} else if (combo == Combo::x2y2) {
+			double prod = 1;
+			for (auto idx : indices) {
+				prod *= cache[idx] * cache[idx];
+			}
+			return prod;
+		} else if (combo == Combo::xy2) {
+			if (indices.size() != 2) {
+				cerr << "index size: " << indices.size() << endl;
+				throw "invalid";
+			}
+			auto i0 = indices[0];
+			auto i1 = indices[1];
+			return cache[i0] * cache[i1] * cache[i1];
+		} else if (combo == Combo::x2y) {
+			if (indices.size() != 2) {
+				throw "invalid";
+			}
+			auto i0 = indices[0];
+			auto i1 = indices[1];
+			return cache[i0] * cache[i0] * cache[i1];
+		} else {
+			throw "invalid combo";
+		}
+	}
+	size_t size() const { return combos.size(); }
+	void print_bounds() const {
+		for (size_t i = 0; i < lookup.size(); i++) {
+			cout << "bounds[" << i << "]: " << mins[i] << " to " << maxs[i] << endl;
+		}
+	}
+	static int log2(uint64_t feature_) {
+		for (size_t i = 0; i < 33; i++) {
+			if (feature_ & (1UL << i)) {
+				return i;
+			}
+		}
+		return 0;
+	}
+	static double hellinger(Point<T>& p, Point<T>& q);
+	double c_hellinger(Point<T>& p, Point<T>& q);
+	static double manhattan(Point<T>& p, Point<T>& q);
+	double c_manhattan(Point<T>& p, Point<T>& q);
+	static double euclidean(Point<T>& p, Point<T>& q);
+	double c_euclidean(Point<T>& p, Point<T>& q);
+	static double chi_squared(Point<T>& p, Point<T>& q);
+	double c_chi_squared(Point<T>& p, Point<T>& q);
+	static double normalized_vectors(Point<T>& p, Point<T>& q);
+	double c_normalized_vectors(Point<T>& p, Point<T>& q);
+	static double harmonic_mean(Point<T>& p, Point<T>& q);
+	double c_harmonic_mean(Point<T>& p, Point<T>& q);
+	static double jefferey_divergence(Point<T>& p, Point<T>& q);
+	double c_jefferey_divergence(Point<T>& p, Point<T>& q);
+	static double k_divergence(Point<T>& p, Point<T>& q);
+	double c_k_divergence(Point<T>& p, Point<T>& q);
+	static double pearson(Point<T>& p, Point<T>& q);
+	double c_pearson(Point<T>& p, Point<T>& q);
+	static double squaredchord(Point<T>& a, Point<T>& b);
+	double c_squaredchord(Point<T>& a, Point<T>& b);
+	static double kl_conditional(Point<T>& a, Point<T>& b);
+	double c_kl_conditional(Point<T>& a, Point<T>& b);
+	static double markov(Point<T>& a, Point<T>& b);
+	double c_markov(Point<T>& a, Point<T>& b);
+	static double intersection(Point<T>& p, Point<T>& q);
+	double c_intersection(Point<T>& p, Point<T>& q);
+	static double rre_k_r(Point<T>& p, Point<T>& q);
+	double c_rre_k_r(Point<T>& p, Point<T>& q);
+	static double d2z(Point<T>& p, Point<T>& q);
+	double c_d2z(Point<T>& p, Point<T>& q);
+	static double sim_mm(Point<T>& p, Point<T>& q);
+	double c_sim_mm(Point<T>& p, Point<T>& q);
+	static double euclidean_z(Point<T>& p, Point<T>& q);
+	double c_euclidean_z(Point<T>& p, Point<T>& q);
+	static double emd(Point<T>& p, Point<T>& q);
+	double c_emd(Point<T>& p, Point<T>& q);
+	static double spearman(Point<T>& p, Point<T>& q);
+	double c_spearman(Point<T>& p, Point<T>& q);
+	static double jaccard(Point<T>& p, Point<T>& q);
+	double c_jaccard(Point<T>& p, Point<T>& q);
+	static double length_difference(Point<T>& p, Point<T>& q);
+	static double d2s(Point<T>& p, Point<T>& q);
+	double c_d2s(Point<T>& p, Point<T>& q);
+	static double afd(Point<T>& p, Point<T>& q);
+	double c_afd(Point<T>& p, Point<T>& q);
+	static double mismatch(Point<T>& p, Point<T>& q);
+	double c_mismatch(Point<T>& p, Point<T>& q);
+	static double canberra(Point<T>& p, Point<T>& q);
+	double c_canberra(Point<T>& p, Point<T>& q);
+	static double kulczynski1(Point<T>& a, Point<T>& b);
+	double c_kulczynski1(Point<T>& a, Point<T>& b);
+	static double kulczynski2(Point<T>& a, Point<T>& b);
+	double c_kulczynski2(Point<T>& a, Point<T>& b);
+	static double simratio(Point<T>& a, Point<T>& b);
+	double c_simratio(Point<T>& a, Point<T>& b);
+	double jensen_shannon(Point<T>& p, Point<T>& q) const;
+	double c_jensen_shannon(Point<T>& p, Point<T>& q);
+	static double d2_star(Point<T>& p, Point<T>& q);
+	double c_d2_star(Point<T>& p, Point<T>& q);
+	double n2r(Point<T>& p, Point<T>& q) const;
+	double c_n2r(Point<T>& p, Point<T>& q);
+	double n2rc(Point<T>& p, Point<T>& q) const;
+	double c_n2rc(Point<T>& p, Point<T>& q);
+	double n2rrc(Point<T>& p, Point<T>& q) const;
+	double c_n2rrc(Point<T>& p, Point<T>& q);
+
+	static double align(Point<T>& a, Point<T>& b, std::map<std::pair<uintmax_t, uintmax_t>, double> &atable);
+	std::function<double(Point<T>&,Point<T>&)> get_func(uint64_t single_feat);
+	std::function<double(Point<T>&,Point<T>&)> get_func_(uint64_t single_feat);
+	bool feat_is_sim(uint64_t single_flag) const;
+	bool get_save() const { return do_save; }
+	void set_save(bool save_) {
+		do_save = save_;
+		if (!save_) {
+			ltable.clear();
+		}
+	}
+	std::vector<std::pair<Combo, std::vector<int> > > get_combos() const { return combos; }
+	std::vector<double> get_mins() const { return mins; };
+	std::vector<double> get_maxs() const { return maxs; };
+	std::vector<uint64_t> get_lookup() const { return lookup; };
+private:
+
+	vector<double> compute_all_raw(Point<T>& p, Point<T>& q);
+	void normalize_cache(vector<double>& cache) const;
+
+
+	// double raw(uint64_t single_flag, Point<T>& a, Point<T>& b);
+	int index_of(uint64_t single_flag) const {
+		for (size_t i = 0; i < lookup.size(); i++) {
+			if (lookup[i] == single_flag) {
+				return i;
+			}
+		}
+		return -1;
+	}
+	void reset_funcs() {
+		raw_funcs.clear();
+		for (auto f : lookup) {
+			raw_funcs.push_back(get_func(f));
+		}
+	}
+	uint64_t get_flags() const { return flags; };
+
+
+	std::vector<bool> get_sims() const { return is_sims; };
+	std::vector<bool> get_finalized() const { return is_finalized; };
+
+
+
+
+
+	int k; int get_k() const { return k; };
+	uint64_t flags;
+	bool do_save;
+	std::vector<std::pair<Combo,
+			      std::vector<int>
+			      > > combos;
+
+	std::vector<double> mins, maxs;
+	std::vector<bool> is_sims, is_finalized;
+	std::vector<uint64_t> lookup;
+	std::vector<int> reverse, reverse_complement;
+	std::vector<std::function<double(Point<T>&,Point<T>&)> > raw_funcs;
+
+	std::map<std::pair<uintmax_t,uintmax_t>, double> atable;
+	std::map<std::tuple<uintmax_t, uintmax_t, uint8_t>, double> ltable;
+
+//	std::map<std::tuple<uintmax_t, uintmax_t, uint8_t>, double> * get_table() const { return ltable; }
+};
+
+// template<class T>
+// class Feature {
+// public:
+// 	Feature(std::function<double(vector<double>)> combination, std::vector<SingleFeature<T> > sf)
+// 		: features(sf), combo(combination) {}
+// 	double operator()(Point<T>*, Point<T>*) const;
+
+
+// 	static double manhattan(Point<T>& p, Point<T>& q);
+// 	static double length_difference(Point<T>& p, Point<T>& q);
+// 	static double n2rrc(Point<T>& p, Point<T>& q, const vector<int>&, const vector<int> &);
+// 	static double rre_k_r(Point<T>& p, Point<T>& q);
+// 	static double intersection(Point<T>& p, Point<T>& q);
+// 	static double jenson_shannon(Point<T>& p, Point<T>& q);
+// 	static double pearson(Point<T>& p, Point<T>& q);
+// 	static double simratio(Point<T>& a, Point<T>& b);
+// 	static double squaredchord(Point<T>& a, Point<T>& b);
+// private:
+// 	vector<SingleFeature<T> > features;
+// 	std::function<double(vector<double>)> combo;
+// };
+#endif
diff --git a/src/cluster/src/GLM.cpp b/src/cluster/src/GLM.cpp
new file mode 100644
index 0000000..f5ef4ba
--- /dev/null
+++ b/src/cluster/src/GLM.cpp
@@ -0,0 +1,66 @@
+/*
+ * glm.cpp
+ *
+ * Created on: May 29, 2017
+ * Author: Robert Geraghty, The Bioinformatics Toolsmith Laboratory, The University of Tulsa
+ *
+ * Modified by Benjamin T James
+ */
+
+#include "GLM.h"
+#include "Matrix.h"
+
+#include <math.h>
+#include <iostream>
+using namespace std;
+// using namespace matrix;
+
+namespace matrix{
+
+void GLM::train(Matrix& features, Matrix& labels){
+	weights = features.transpose() * features;
+	weights = weights.pseudoInverse() * features.transpose() * labels;
+}
+
+Matrix GLM::predict(Matrix& features) const {
+	Matrix labels;
+	labels	= features * weights;
+	double log;
+	for(int i = 0; i < labels.getNumRow(); i++){
+		log = round(1/(1 + exp(-(labels.get(i,0)))));
+		labels.set(i,0, log);
+	}
+	return labels;
+}
+
+std::tuple<double,double,double> GLM::accuracy(Matrix& oLabels, Matrix& pLabels) const {
+	int sum = 0;
+	int negSum = 0;
+	int negSame = 0;
+	int posSum = 0;
+	int posSame = 0;
+	for(int i = 0; i < oLabels.getNumRow(); i++){
+		if(oLabels.get(i,0) == -1){
+			negSum++;
+			if(oLabels.get(i,0) == pLabels.get(i, 0)){
+				sum++;
+				negSame++;
+			}
+		}else{
+			posSum++;
+			if(oLabels.get(i,0) == pLabels.get(i, 0)){
+				sum++;
+				posSame++;
+			}
+		}
+	}
+	double acc = (((double)sum*100)/(oLabels.getNumRow()));
+	double sens =  (((double)posSame*100)/(posSum));
+	double spec = (((double)negSame*100)/(negSum));
+	// cout << "Accuracy: " << acc << "% ";
+	// cout << "Sensitivity: " << sens << "% ";
+	// cout << "Specificity: " << spec << "% " << endl;
+	return make_tuple(acc, sens, spec);
+}
+
+}
diff --git a/src/cluster/src/GLM.h b/src/cluster/src/GLM.h
new file mode 100644
index 0000000..d9e150b
--- /dev/null
+++ b/src/cluster/src/GLM.h
@@ -0,0 +1,31 @@
+/*
+ * glm.h
+ *
+ * Created on: May 29, 2017
+ * Author: Robert Geraghty, The Bioinformatics Toolsmith Laboratory, The University of Tulsa
+ *
+ * Modified by Benjamin T James
+ */
+
+#ifndef SRC_MATRIX_GLM_H_
+#define SRC_MATRIX_GLM_H_
+
+#include "Matrix.h"
+#include <tuple>
+namespace matrix {
+
+class GLM {
+private:
+	Matrix weights;
+
+public:
+	void load(Matrix weights_) { weights = weights_; }
+	void train(matrix::Matrix& features, matrix::Matrix& labels);
+	Matrix predict(matrix::Matrix& features) const;
+	std::tuple<double,double,double> accuracy(matrix::Matrix& oLabels, matrix::Matrix& pLabels) const;
+	const Matrix& get_weights() const { return weights; };
+};
+
+}
+
+#endif /* SRC_MATRIX_GLM_H_ */
diff --git a/src/cluster/src/HandleSeq.cpp b/src/cluster/src/HandleSeq.cpp
new file mode 100644
index 0000000..041c22a
--- /dev/null
+++ b/src/cluster/src/HandleSeq.cpp
@@ -0,0 +1,155 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to take a sequence and mutate it to returns
+ 			It also serves as a way to parse a file for all sequences
+ */
+
+#include "HandleSeq.h"
+#include <omp.h>
+// d
+HandleSeq::HandleSeq(int m) {
+
+	mode = m & HandleSeq::BOTH;
+	enableTrans = m & HandleSeq::TRANSLOCATION;
+	enableRev = m & HandleSeq::REVERSION;
+	// disable = (m & HandleSeq::ATYPICAL) > 0 ? 0 : 1;
+}
+
+pair<vector<string>, vector<string>> HandleSeq::parseFile(string fileName) {
+	ifstream fileIn;
+	//Uses the file the user supplies to take in sequences
+	fileIn.open(fileName, ifstream::in);
+	if(fileIn.is_open()){
+	vector<string> sequences;
+	vector<string> names;
+	string inString;
+	//Boolean to make sure that the first sequence
+	//has already been found, prevents a null string being written
+	bool foundFirst = false;
+	string currentLine;
+	while (!fileIn.eof()) {
+		getline(fileIn, currentLine);
+		//Skip the line if nothing is on it
+		if (currentLine.length() == 0) {
+			continue;
+		}
+		//If the line has a '>' symbol, the start of a new sequence
+		else if (currentLine.at(0) == '>' && foundFirst) {
+			//Push the current saved sequene onto the vector,
+			//then reset the strings value
+			sequences.push_back(inString);
+			names.push_back(currentLine.substr(1, currentLine.find_first_of(' ')));
+			inString = "";
+		}
+		else if(currentLine.at(0) == '>' && !foundFirst){
+			foundFirst = true;
+			names.push_back(currentLine.substr(1, currentLine.find_first_of(' ')));
+		}
+		//If this is the first >, set found first to true
+		else if (!foundFirst) {
+			foundFirst = true;
+		}
+		//Otherwise, add the current Line to
+		//the string of current lines
+		else {
+			inString = inString + currentLine;
+		}
+	}
+	//Push the last found string on
+	//(There is no > at the end of a .fa file)
+	sequences.push_back(inString);
+	fileIn.close();
+	return {names, sequences};
+	}
+	else{
+		cout << "Could not find File" << endl;
+		exit(2);
+	}
+}
+
+pair<float, string> HandleSeq::mutate(string sequence, int muteRate) {
+	percMute = muteRate;
+	if (muteRate == 0) {
+		return std::make_pair(1, sequence);
+	}
+	auto nucls = countNucl(sequence);
+	//Assing the percent of each nucleotide in the sequence
+	int percAs = (nucls.at(0) * 100) / sequence.length();
+	int percCs = (nucls.at(1) * 100) / sequence.length();
+	int percGs = (nucls.at(2) * 100) / sequence.length();
+	int percTs = (nucls.at(3) * 100) / sequence.length();
+	int percMulti, percSing;
+	string * seq = new string(sequence);
+	int length = sequence.length();
+	//If the user only wants single
+	if (mode == 1) {
+		percMulti = 0;
+		//Allocate all mutations to single
+		percSing = percMute;
+	}
+	//Or if the user only wants non single
+	else if (mode == 2) {
+		//Allocate all mutations to non-single
+		percSing = 0;
+		percMulti = percMute;
+	}
+	//Otherwise, assing a random percentage to both
+	else {
+		percMulti = rand() % percMute;
+		percSing = percMute - percMulti;
+	}
+	//Define a new multiple mutation
+	MultiMute multi(percAs, percCs, percGs, percTs,
+			percMulti, enableTrans, enableRev);
+	//Run the multiple mutations,
+	//get back its vector of what is valid to mutate and what isn't
+	vector<bool> mutes = multi.genMulti(seq);
+	uint64_t cnt = 0;
+	for (bool b : mutes) {
+		cnt += b ? 1 : 0;
+	}
+
+	SingMute sing(percAs, percCs, percGs, percTs,
+		      percSing, seq, mutes);
+	float alignmentLength = multi.getAlignmentLength() + sing.getAlignmentLength() + length;
+//	cout << "alignLength: " << alignmentLength << endl;
+	float IBP = length - multi.getIBP() - sing.getIBP();
+//	cout << "ibp: " << IBP << endl;
+	float alignment = IBP / alignmentLength;
+//	cout << "ratio: size: " << mutes.size() << " expected: " << (float)cnt / mutes.size() << " found: " << ((float)length - multi.getIBP()) / ((float)multi.getAlignmentLength() + length) << " align: " << alignment << endl;
+	//assign the sequence to the
+	//value that the seq pointer stores to
+	//clear the heap
+	delete seq;
+	//Return the now mutated sequence
+	std::string outseq = sing.getSeq();
+	return make_pair(alignment, outseq);
+}
+
+vector<int> HandleSeq::countNucl(string sequence) {
+	int a = 0;
+	int c = 0;
+	int g = 0;
+	int t = 0;
+	for (int i = 0; i < sequence.length(); i++) {
+		if (sequence.at(i) == 'A') {
+			a++;
+		} else if (sequence.at(i) == 'C') {
+			c++;
+		} else if (sequence.at(i) == 'G') {
+			g++;
+		} else if (sequence.at(i) == 'T') {
+			t++;
+		}
+	}
+	vector<int> values;
+	values.push_back(a);
+	values.push_back(c);
+	values.push_back(g);
+	values.push_back(t);
+	return values;
+}
diff --git a/src/cluster/src/HandleSeq.h b/src/cluster/src/HandleSeq.h
new file mode 100644
index 0000000..95a7718
--- /dev/null
+++ b/src/cluster/src/HandleSeq.h
@@ -0,0 +1,77 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to take a sequence and mutate it to returns
+ 			It also serves as a way to parse a file for all sequences
+ */
+#ifndef HANDLESEQ_H
+#define  HANDLESEQ_H
+
+#include <iostream>
+#include <vector>
+#include <fstream>
+#include <string>
+#include "MultiMute.h"
+#include "SingMute.h"
+
+using namespace std;
+
+class HandleSeq {
+public:
+	// Single — point — mutations only
+	static const int SINGLE = (1 << 0);
+	// Non-single point mutations only
+	static const int NON_SINGLE = (1 << 1);
+	// Single and non-single mutations
+	static const int BOTH = SINGLE | NON_SINGLE;
+// translocations and reversions
+	static const int TRANSLOCATION = (1 << 2);
+	static const int REVERSION = (1 << 3);
+	static const int ATYPICAL = TRANSLOCATION | REVERSION;
+	static const int ALL = ATYPICAL | BOTH;
+	/*
+	 constructor
+
+	 @param:
+	 int: the mode of the program
+	 				(Single only = 1, nonsingle only = 2, both = 3)
+	 */
+	HandleSeq(int);
+	/*
+	 returns a vector of all sequences in a file inputted
+
+	 @param:
+	 std::string: file name
+	 int: the mutation rate
+
+	 @return:
+	 std::vector<std::string>: Vector of all found sequences
+	 */
+	pair<vector<string>, vector<string>> parseFile(string);
+	/*
+	 Mutates a sequence based on parameters inputted in constructor,
+	 	and returns the mutated sequence
+	 */
+	pair<float, string> mutate(string, int);
+private:
+	int mode;
+	int percMute;
+	bool enableTrans, enableRev;
+	/*
+	 Counts the nucleotides in a file,
+	 	and returns a vector corresponding to their values {A, C, G, T}
+
+	 @param:
+	 std::string: the sequences
+
+	 @return:
+	 std::vector<int>: vector containing ints of each nucleotide count
+	 */
+	vector<int> countNucl(string);
+
+};
+
+#endif
diff --git a/src/cluster/src/Histogram.cpp b/src/cluster/src/Histogram.cpp
new file mode 100644
index 0000000..a669687
--- /dev/null
+++ b/src/cluster/src/Histogram.cpp
@@ -0,0 +1,195 @@
+/* -*- C++ -*-
+ *
+ * Histogram.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Artifact from early development of MeShClust
+ */
+#ifndef HEADER_HACK
+#include "Histogram.h"
+#endif
+
+#include <vector>
+#include <iostream>
+
+template<class T>
+double Histogram<T>::distance_k1(const Point<T> &p) const
+{
+	throw "Not implemented";
+	const Histogram<T>& h = dynamic_cast<const Histogram<T>&>(p);
+	uint64_t dist = 0;
+        auto size = std::min(points.size(),h.points.size());
+/*
+	for (unsigned int i = 0; i < size; i++) {
+		T l = points.at(i);
+		T r = h.points.at(i);
+		dist += (l > r) ? (l - r) : (r - l);
+	}
+*/
+	uint64_t avg_mag = (magnitude() + h.magnitude()) / 2.0;
+	for (auto i = 0; i < size; i++) {
+		T l = points[i];
+		T r = h.points[i];
+		dist += min(l, r);
+	}
+	return 1.0 - dist / avg_mag;
+}
+template<class T>
+Histogram<T>::Histogram(std::vector<T> pts, char mark)
+{
+	for (T t : pts) {
+		points.push_back(t);
+	}
+	to_delete = false;
+}
+template<class T>
+Histogram<T>::Histogram(std::vector<T> pts)
+{
+	for (T t : pts) {
+		points.push_back(t);
+	}
+	to_delete = false;
+}
+
+template<class T>
+Histogram<T>::Histogram(std::vector<T> pts, bool toDelete)
+{
+	for (T t : pts) {
+		points.push_back(t);
+	}
+	to_delete = toDelete;
+}
+
+template<class T>
+Histogram<T>::Histogram(unsigned int size)
+{
+	for (unsigned int i = 0; i < size; i++) {
+		points.push_back(0);
+	}
+	to_delete = false;
+}
+
+template<class T>
+void Histogram<T>::operator*=(double d)
+{
+	for (T &t : points) {
+		t *= d;
+	}
+}
+
+template<class T>
+bool Histogram<T>::operator<(Point<T>& p) const
+{
+	const Histogram<T>& h = dynamic_cast<const Histogram<T>&>(p);
+	unsigned int size = std::min(points.size(),h.points.size());
+	for (unsigned int i = 0; i < size; i++) {
+		if (points.at(i) >= h.points.at(i)) {
+			return false;
+		}
+	}
+	return true;
+}
+
+template<class T>
+void Histogram<T>::operator/=(double d)
+{
+	unsigned int size = points.size();
+	for (unsigned int i = 0; i < size; i++) {
+		points.at(i) = points.at(i) / d;
+	}
+}
+
+template<class T>
+void Histogram<T>::operator+=(Point<T>& p)
+{
+	const Histogram<T>& h = dynamic_cast<const Histogram<T>&>(p);
+	unsigned int size = std::min(points.size(),h.points.size());
+	for (unsigned int i = 0; i < size; i++) {
+		points.at(i) += h.points.at(i);
+	}
+}
+
+template<class T>
+uint64_t Histogram<T>::operator-(const Point<T>& p) const
+{
+	return distance(p);
+}
+
+template<class T>
+void Histogram<T>::set(Point<T>& p)
+{
+	const Histogram<T>& h = dynamic_cast<const Histogram<T>&>(p);
+	points = h.points;
+}
+
+template<class T>
+void Histogram<T>::display() const
+{
+	unsigned size = points.size();
+	for (unsigned i = 0; i < size; i++) {
+		std::cout << points.at(i) << " ";
+	}
+	std::cout << std::endl;
+}
+
+template<class T>
+void Histogram<T>::addOne()
+{
+	for (auto &a : points) {
+		a++;
+	}
+}
+template<class T>
+void Histogram<T>::subOne()
+{
+	for (auto &a : points) {
+		a--;
+	}
+}
+
+template<class T>
+void Histogram<T>::zero()
+{
+	for (typename std::vector<T>::iterator it = points.begin(); it != points.end(); ++it) {
+		*it = 0;
+	}
+}
+
+template<class T>
+uint64_t Histogram<T>::distance(const Point<T>& p) const
+{
+/*
+	// Vectors should be the same width
+	const Histogram<T>& h = dynamic_cast<const Histogram<T>&>(p);
+	T dist = 0;
+	unsigned int size = std::min(points.size(),h.points.size());
+	for (unsigned int i = 0; i < size; i++) {
+		T l = points.at(i);
+		T r = h.points.at(i);
+		dist += (l > r) ? (l - r) : (r - l);
+	}
+	return dist;
+*/
+	throw "Not implemented";
+	return 0;
+}
+
+template<class T>
+uint64_t Histogram<T>::magnitude() const
+{
+	uint64_t dist = 0;
+	for (auto const& p : points) {
+		dist += p;
+	}
+	return dist;
+}
+
+#ifndef HEADER_HACK
+template class Histogram<int>;
+template class Histogram<double>;
+template class Histogram<uint64_t>;
+template class Histogram<uint32_t>;
+template class Histogram<uint16_t>;
+template class Histogram<uint8_t>;
+#endif
diff --git a/src/cluster/src/Histogram.h b/src/cluster/src/Histogram.h
new file mode 100644
index 0000000..1813bb4
--- /dev/null
+++ b/src/cluster/src/Histogram.h
@@ -0,0 +1,80 @@
+/* -*- C++ -*-
+ *
+ * Histogram.h
+ *
+ * Author: Benjamin T James
+ *
+ * Artifact from early development of MeShClust
+ */
+#ifndef HISTOGRAM_H
+#define HISTOGRAM_H
+#include <vector>
+#include "Point.h"
+
+template<class T>
+class Histogram : public Point<T> {
+public:
+	Histogram(std::vector<T> pts);
+	Histogram(std::vector<T> pts, char marker);
+	Histogram(std::vector<T> pts, bool to_delete);
+	Histogram(unsigned int size);
+	~Histogram() {}
+	void operator*=(double d);
+	void operator/=(double d);
+	uint64_t operator-(const Point<T>& p) const;
+	bool operator<(Point<T>& p) const;
+	void operator+=(Point<T>& p);
+	void set(Point<T>& p);
+	void display() const;
+	void zero();
+	void addOne();
+	void subOne();
+	double distance_k1(const Point<T>& p) const;
+	double prob_under(Point<T>& p) const { return distance(p); };
+	uint64_t distance(const Point<T>& p) const;
+	uint64_t magnitude() const;
+	uint64_t getRealMagnitude() const { return 0; };
+	double distance_d(Point<double>& p) const {
+		throw "not implemented";
+		return 0;
+	}
+	void set_arg_to_this_d(Point<double>& p) const {
+		throw "not implemented";
+	}
+	Point<double>* create_double() const {
+		throw "not implemented";
+		return NULL;
+	}
+	Histogram* clone() const {
+		return new Histogram(points, to_delete);
+	}
+	Histogram* create() const {
+		return new Histogram(points.size());
+	}
+	bool is_to_delete() const {
+		return to_delete;
+	}
+	void set_to_delete(bool b) {
+		to_delete = b;
+	}
+	const vector<T>& get_data() const { return points; }
+	void set_id(uintmax_t c_id) { id = c_id; };
+	const uintmax_t get_id() const { return id; };
+	void set_length(unsigned long len) { nucl_length = len; };
+	unsigned long get_length() const { return nucl_length; };
+        unsigned long size() const { return points.size(); };
+private:
+	std::vector<T> points;
+	bool to_delete;
+	uintmax_t id;
+	unsigned long nucl_length;
+};
+
+#ifdef HEADER_HACK
+#ifndef HISTOGRAM_C
+#define HISTORGRAM_C
+#include "Histogram.cpp"
+#endif
+#endif
+
+#endif
diff --git a/src/cluster/src/Loader.cpp b/src/cluster/src/Loader.cpp
new file mode 100644
index 0000000..73691b6
--- /dev/null
+++ b/src/cluster/src/Loader.cpp
@@ -0,0 +1,111 @@
+/* -*- C++ -*-
+ *
+ * Loader.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Class which can 'preload' chunks of sequences from a file list,
+ * and then count the k-mers separately, which can be done in
+ * multiple threads
+ */
+#include "Loader.h"
+#include "ClusterFactory.h"
+#include "DivergencePoint.h"
+#include <omp.h>
+
+template<class T>
+bool Loader<T>::done() const
+{
+	return file_idx == files.size();
+}
+
+template<class T>
+void Loader<T>::preload(int tid)
+{
+	if (file_idx == files.size()) {
+		return;
+	}
+	for (uint64_t j = 0; j < chunk_size; j++) {
+		auto chrom = next();
+		if (chrom.first == "") {
+			return;
+		}
+		cache_list.at(tid).emplace_back(chrom.first, chrom.second);
+	}
+}
+
+
+template<class T>
+Point<T>* Loader<T>::get_point(std::string header, const std::string &base, uintmax_t& id, int k)
+{
+	KmerHashTable<unsigned long, T> table(k, 1);
+	KmerHashTable<unsigned long, uint64_t> table_k1(1, 0);
+	std::vector<T> values;
+	vector<uint64_t> values_k1;
+	values.clear();
+	ChromosomeOneDigit chrom;
+	chrom.setHeader(header);
+	chrom.appendToSequence(base);
+	chrom.finalize();
+	fill_table<T>(table, &chrom, values);
+	fill_table<uint64_t>(table_k1, &chrom, values_k1);
+//	int tmplate = get_template(chrom->getHeader(), templates);
+	Point<T> *p = new DivergencePoint<T>(values, chrom.size());
+//	cout << "mag: " << ((DivergencePoint<T>*)p)->getPseudoMagnitude() << std::endl;
+	p->set_1mers(values_k1);
+	p->set_header(header);
+	p->set_length(chrom.getBase()->length());
+	p->set_data_str(*chrom.getBase());
+	DivergencePoint<T>* q = dynamic_cast<DivergencePoint<T>*>(p);
+	const auto N = q->points.size();
+	double aq = (double) q->getPseudoMagnitude() / N;
+	double sq = 0;
+	for (auto i = 0; i < N; i++) {
+		double qdiff = q->points[i] - aq;
+		sq += qdiff * qdiff;
+	}
+	sq = sqrt(sq / N);
+	q->set_stddev(sq);
+	p->set_id(id);
+	#pragma omp atomic
+	id++;
+	return p;
+}
+
+template<class T>
+std::vector<Point<T>*> Loader<T>::load_next(int tid)
+{
+	std::vector<Point<T>*> points;
+	for (size_t i = 0; i < cache_list.at(tid).size(); i++) {
+	        auto pr = cache_list.at(tid).at(i);
+		Point<T>* p = get_point(pr.first, *pr.second, id_list.at(tid), k);
+		points.push_back(p);
+		delete pr.second;
+	}
+	cache_list.at(tid).clear();
+	return points;
+}
+
+template<class T>
+std::pair<std::string,std::string*> Loader<T>::next()
+{
+	auto n = maker->next();
+	if (n.first != "") {
+		return n;
+	}
+	delete maker;
+	maker = NULL;
+	file_idx++;
+	if (file_idx >= files.size()) {
+		return n;
+	}
+	maker = new SingleFileLoader(files.at(file_idx));
+	return maker->next();
+}
+
+template class Loader<double>;
+template class Loader<int>;
+template class Loader<uint64_t>;
+template class Loader<uint32_t>;
+template class Loader<uint16_t>;
+template class Loader<uint8_t>;
diff --git a/src/cluster/src/Loader.h b/src/cluster/src/Loader.h
new file mode 100644
index 0000000..28da845
--- /dev/null
+++ b/src/cluster/src/Loader.h
@@ -0,0 +1,73 @@
+/* -*- C++ -*-
+ *
+ * Loader.h
+ *
+ * Author: Benjamin T James
+ *
+ * Class which can 'preload' chunks of sequences from a file list,
+ * and then count the k-mers separately, which can be done in
+ * multiple threads
+ */
+#ifndef LOADER_H
+#define LOADER_H
+
+#include "Point.h"
+#include "SingleFileLoader.h"
+#include "ClusterFactory.h"
+
+template<class T>
+class Loader {
+public:
+	Loader(std::vector<std::string> files_,
+	       uint64_t total_num_points_,
+	       uint64_t chunk_size_,
+	       int num_threads_,
+	       int k_,
+	       uint64_t start_id=0)
+		:
+		chunk_size(chunk_size_),
+		num_threads(num_threads_),
+		k(k_),
+		files(files_) {
+
+		maker = new SingleFileLoader(files.at(0));
+		uint64_t total_id = start_id;
+		for (int i = 0; i < num_threads_; i++) {
+			id_list.push_back(total_id);
+			total_id += total_num_points_;
+			cache_list.push_back(std::vector<std::pair<std::string,std::string*> >());
+		}
+//		preload();
+	};
+
+	~Loader() {
+		cache_list.clear();
+		id_list.clear();
+		if (maker != NULL) {
+			delete maker;
+		}
+	}
+
+	// single threaded
+	void preload(int tnum);
+
+	bool done() const;
+	// multi-thread accessible
+	std::vector<Point<T>*> load_next(int tid);
+
+	static Point<T>* get_point(std::string header, const std::string &base, uintmax_t& id, int k);
+private:
+
+	std::pair<std::string,std::string*> next();
+
+	uint64_t chunk_size;
+	int num_threads, k;
+
+	std::vector<std::vector<std::pair<std::string,std::string*> > > cache_list;
+	std::vector<uint64_t> id_list;
+
+	std::vector<std::string> files;
+	size_t file_idx = 0;
+	SingleFileLoader *maker = NULL;
+};
+#endif
diff --git a/src/cluster/src/LogTable.cpp b/src/cluster/src/LogTable.cpp
new file mode 100644
index 0000000..0a05a9d
--- /dev/null
+++ b/src/cluster/src/LogTable.cpp
@@ -0,0 +1,41 @@
+#include "LogTable.h"
+
+#include <cmath>
+#include <iostream>
+
+LogTable::LogTable() : coeff(1000000 / 2)
+{
+	uintmax_t size = 1000000;
+	double imax = 2;
+//	map = new double[size];
+	double lsize = log(size);
+	for (uintmax_t i = 0; i < size; i++) {
+		map[i] = log(imax * (i + 1)) - lsize;
+	}
+	std::cout << "dmax: " << coeff << std::endl;
+}
+LogTable::LogTable(uintmax_t size, double imax) : coeff(size / imax)
+{
+	//map = new double[size];
+	double lsize = log(size);
+	for (uintmax_t i = 0; i < size; i++) {
+		map[i] = log(imax * (i + 1)) - lsize;
+	}
+	std::cout << "dmax: " << coeff << std::endl;
+}
+
+LogTable::~LogTable()
+{
+	//delete[] map;
+}
+
+double LogTable::at(double d) const
+{
+	size_t idx = d * coeff;
+	return map[idx];
+}
+double LogTable::operator[](double d) const
+{
+	size_t index = d * coeff;
+	return map[index];
+}
diff --git a/src/cluster/src/LogTable.h b/src/cluster/src/LogTable.h
new file mode 100644
index 0000000..6fab42e
--- /dev/null
+++ b/src/cluster/src/LogTable.h
@@ -0,0 +1,20 @@
+#ifndef LOGTABLE_H
+#define LOGTABLE_H
+
+#include <stdint.h>
+#include <vector>
+
+#define TBLSIZE 1000000
+class LogTable {
+public:
+	LogTable();
+	LogTable(uintmax_t _size, double imax=2);
+	~LogTable();
+	double at(double d) const;
+	double operator[](double d) const;
+private:
+	double map[TBLSIZE];
+
+	const double coeff;
+};
+#endif
diff --git a/src/cluster/src/Mat.h b/src/cluster/src/Mat.h
new file mode 100644
index 0000000..eb711ed
--- /dev/null
+++ b/src/cluster/src/Mat.h
@@ -0,0 +1,73 @@
+/* -*- C++ -*-
+ *
+ * Mat.h
+ *
+ * Author: Benjamin T James
+ */
+#ifndef MAT_H
+#define MAT_H
+#include <iostream>
+#include <functional>
+using namespace std;
+template<class T>
+class Mat {
+public:
+	Mat(function<T(int,int)> func, const long size) : n(size), table_size(size*(size+1)/2), compute(func) {
+		if (size <= 0) {
+			throw "Invalid size";
+		}
+		table = new T[table_size];
+		set = new bool[table_size]();
+	};
+	~Mat() {
+		delete[] table;
+		delete[] set;
+	};
+	void fill() {
+		unsigned long long count = 0;
+		#ifdef OPENMP
+                #pragma omp parallel for collapse(2) shared(set)
+		#endif
+		for (long i = 0; i < n; i++) {
+			for (long j = 0; j < n; j++) {
+				const auto idx = addr(i, j);
+				if (!set[idx]) {
+					auto res = compute(i, j);
+					table[idx] = res;
+					set[idx] = true;
+					count++;
+				}
+				if (count % 10000 == 0) {
+					cout << count << " / " << table_size << endl;
+				}
+			}
+		}
+
+	};
+	T& operator[](pair<int,int> index) {
+		const unsigned long idx = addr(index.first, index.second);
+		if (!set[idx]) {
+			table[idx] = compute(index.first, index.second);
+			set[idx] = true;
+		}
+		return table[idx];
+	};
+	bool exists(int i, int j) const {
+		return set[addr(i, j)];
+	}
+private:
+	T* table;
+	bool* set;
+	const unsigned long table_size;
+	const unsigned long n;
+	function<T(int,int)> compute;
+
+	unsigned long addr(unsigned long i, unsigned long j) const {
+		if (i <= j) {
+			return i * n - (i - 1) * i / 2 + j - i;
+		} else {
+			return j * n - (j - 1) * j / 2 + i - j;
+		}
+	};
+};
+#endif
diff --git a/src/cluster/src/Matrix.cpp b/src/cluster/src/Matrix.cpp
new file mode 100644
index 0000000..997d1c7
--- /dev/null
+++ b/src/cluster/src/Matrix.cpp
@@ -0,0 +1,360 @@
+/*
+ * matrix.cpp
+ *
+ * Created on: May 10, 2017
+ * Author: Robert Geraghty, The Bioinformatics Toolsmith Laboratory, The University of Tulsa
+ */
+
+#include "Matrix.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <iomanip>
+#include <stdlib.h>
+#include <math.h>
+
+using namespace std;
+
+namespace matrix {
+
+Matrix::Matrix(int r, int c) :
+		numRow(r), numCol(c) {
+	m.resize(r);
+	for (int i = 0; i < r; i++) {
+		m.at(i) = vector<double>(c);
+	}
+}
+Matrix::Matrix() :
+		numRow(0), numCol(0) {
+
+}
+
+Matrix::~Matrix() {
+
+}
+
+Matrix Matrix::operator+(Matrix n) {
+	if (numCol == n.numCol && numRow == n.numRow) {
+		Matrix mat = Matrix(numRow, numCol);
+		for (int i = 0; i < mat.numRow; i++) {
+			for (int j = 0; j < mat.numCol; j++) {
+				mat.set(i, j, (get(i, j) + n.get(i, j)));
+			}
+		}
+		return mat;
+	} else {
+		cerr << "Invalid input: array dimension mismatch." << endl;
+		throw exception();
+	}
+}
+
+Matrix Matrix::operator-(Matrix n) {
+	if (numCol == n.numCol && numRow == n.numRow) {
+		Matrix mat = Matrix(numRow, numCol);
+		for (int i = 0; i < mat.numRow; i++) {
+			for (int j = 0; j < mat.numCol; j++) {
+				mat.set(i, j, (get(i, j) - n.get(i, j)));
+			}
+		}
+		return mat;
+	} else {
+		cerr << "Invalid input: array dimension mismatch." << "\n";
+		throw exception();
+	}
+}
+
+Matrix Matrix::operator*(Matrix n) {
+
+	if (numCol == n.numRow) {
+		double curSum = 0;
+		Matrix mat = Matrix(numRow, n.numCol);
+////#pragma omp parallel for collapse(2)
+		for (int i = 0; i < mat.numRow; i++) {
+			for (int j = 0; j < mat.numCol; j++) {
+				curSum = 0;
+				for (int k = 0; k < numCol; k++) {
+					curSum = curSum + get(i, k) * n.get(k, j);
+				}
+				mat.set(i, j, curSum);
+			}
+		}
+		return mat;
+	} else {
+		cerr << "Invalid input: array dimension mismatch." << endl;
+		throw exception();
+	}
+}
+
+Matrix Matrix::transpose() {
+	Matrix temp = Matrix(numCol, numRow);
+	for (int i = 0; i < numRow; i++) {
+		for (int j = 0; j < numCol; j++) {
+			temp.set(j, i, get(i, j));
+		}
+	}
+	return temp;
+
+}
+
+Matrix Matrix::gaussJordanInverse() {
+	if (numRow == numCol) {			//Checks if matrix is square
+		Matrix invert = Matrix(numRow, numCol);
+		Matrix temp = Matrix(numRow, numCol);
+		double pivotVal;
+
+		temp.m = m;
+
+		for (int i = 0; i < numRow; i++) {//Creates identity Matrix, which will become inverse matrix
+			invert.set(i, i, 1);
+		}
+
+		for (int i = 0; i < numRow; i++) {
+			if (get(i, i) != 1) {				//Checks if the pivot point is 1
+				if (get(i, i) != 0) {//Check if the pivot point is 0, if not it performs a type 2 row operation to set the pivot point to 1
+					pivotVal = get(i, i);
+					for (int j = 0; j < numCol; j++) {
+						set(i, j, (get(i, j) / pivotVal));
+						invert.set(i, j, (invert.get(i, j) / pivotVal));
+					}
+				} else {//If the pivot point is zero, it performs a type 1 row operation
+					bool properSwap = false;
+					int row = i + 1;
+					double valSwap;
+					double valSwap2;
+					while (!properSwap && row < numRow) {
+						if (get(row, i) != 0) {
+							properSwap = true;
+						} else {
+							row++;
+						}
+					}
+					if (properSwap) {
+						for (int j = 0; j < numCol; j++) {
+							valSwap = get(i, j);
+							valSwap2 = invert.get(i, j);
+							set(i, j, get(row, j));
+							invert.set(i, j, (invert.get(row, j)));
+							set(row, j, valSwap);
+							invert.set(row, j, valSwap2);
+						}
+					} else {//If it cannot perform a type 1 row swap with a non zero pivot value, the Inverse does not exist.
+						cout << "Inverse does not exist\n";
+						throw 0;
+						m = temp.m;
+						return temp;
+					}
+					pivotVal = get(i, i);
+					for (int j = 0; j < numCol; j++) {//Now perform a type 2 row operation to set the new pivot point to 1
+						set(i, j, (get(i, j) / pivotVal));
+						invert.set(i, j, (invert.get(i, j) / pivotVal));
+					}
+				}
+			}
+			for (int below = i + 1; below < numRow; below++) { //Iterate through the elements below the pivot, performing type 3 row operations to set each to 0
+				if (get(below, i) != 0) {
+					pivotVal = get(below, i);
+					for (int j = 0; j < numCol; j++) {
+						set(below, j, (get(below, j) - (pivotVal * get(i, j))));
+						invert.set(below, j,
+								(invert.get(below, j)
+										- (pivotVal * invert.get(i, j))));
+					}
+				}
+			}
+		}
+		//		cout << "\n\n";
+		for (int i = numRow - 1; i >= 0; i--) {	//Now perform the same step as the last except on the elements above the pivot.
+			for (int above = 0; above < i; above++) {
+				if (get(above, i) != 0) {
+					pivotVal = get(above, i);
+					for (int j = 0; j < numCol; j++) {
+						set(above, j, (get(above, j) - (pivotVal * get(i, j))));
+						invert.set(above, j,
+								(invert.get(above, j)
+										- (pivotVal * invert.get(i, j))));
+					}
+				}
+			}
+		}
+		for (int i = 0; i < numRow; i++) {//Now check to make sure the original matrix is an identity matrix.
+			for (int j = 0; j < numCol; j++) {
+				if (i == j && get(i, j) != 1) {
+					cout << "Inverse does not exist\n";
+					throw 0;
+					m = temp.m;
+					return temp;
+				}
+				if (i != j && get(i, j) != 0) {
+					cout << "Inverse does not exist\n";
+					throw 0;
+					m = temp.m;
+					return temp;
+				}
+			}
+		}
+		m = temp.m;				//Reset the original matrix
+		return invert;
+	}
+	cerr << "Invalid dimensions" << endl;
+	throw exception();
+}
+
+Matrix Matrix::pseudoInverse() {
+	if (numRow >= numCol) {
+		Matrix temp = transpose();
+		Matrix transByOrig = temp * *this;
+		Matrix psuedoInv = (transByOrig.gaussJordanInverse()) * temp;
+		return psuedoInv;
+	} else {
+		Matrix temp = transpose();
+		Matrix origByTrans = *this * temp;
+		Matrix psuedoInv = temp * (origByTrans.gaussJordanInverse());
+		return psuedoInv;
+	}
+}
+
+double Matrix::get(int r, int c) const {
+	return m.at(r).at(c);
+}
+
+void Matrix::set(int r, int c, double val) {
+	m.at(r).at(c) = val;
+	//m[r][c] = val;
+}
+
+void Matrix::print() {
+	for (int i = 0; i < numRow; i++) {
+		for (int j = 0; j < numCol; j++) {
+			cout << right << fixed;
+			cout << "[" << setprecision(4) << setw(7) << get(i, j) << "] ";
+		}
+		cout << endl;
+	}
+	cout << endl;
+}
+
+void Matrix::printToFile(string fileName) {
+	ofstream outSequence(fileName.c_str());
+
+	for (int i = 0; i < numRow; i++) {
+		for (int j = 0; j < numCol; j++) {
+			outSequence << right << fixed;
+			outSequence << "[" << setprecision(4) << setw(7) << get(i, j)
+					<< "] ";
+		}
+		outSequence << endl;
+	}
+	outSequence << endl;
+
+	outSequence.close();
+}
+
+void Matrix::randFill(double low, double high) {
+	double x;
+	for (int i = 0; i < numRow; i++) {
+		for (int j = 0; j < numCol; j++) {
+			x = ((double) rand() * (high - low)) / (double) RAND_MAX + low;
+			set(i, j, x);
+		}
+	}
+}
+
+void Matrix::userFill() {
+	double val;
+	for (int i = 0; i < numRow; i++) {
+		for (int j = 0; j < numCol; j++) {
+			cout << "input value for cell (" << i << ", " << j << ")?\n";
+			cin >> val;
+			cout << endl;
+			set(i, j, val);
+		}
+	}
+}
+
+void Matrix::fileFill(string filename) {
+	ifstream infile(filename.c_str());
+	if (!infile) {
+		cerr << "file read fail" << endl;
+		throw exception();
+	}
+	string line;
+	int i = -1;
+	while (getline(infile, line)) {
+		i++;
+		if (i >= numRow) {
+			addRow(0);
+		}
+		double num;
+		istringstream iss(line);
+		int j = -1;
+		while (iss >> num) {
+			j++;
+			if (j >= numCol) {
+				addCol(0);
+			}
+			//cout << num << endl;
+			set(i, j, num);
+		}
+		j = 0;
+	}
+	i = 0;
+}
+
+void Matrix::addRow(double val) {
+	numRow++;
+	vector<double> temp = vector<double>(numCol, val);
+	m.push_back(temp);
+}
+
+void Matrix::addCol(double val) {
+	numCol++;
+	for (int i = 0; i < numRow; i++) {
+		m.at(i).push_back(val);
+	}
+}
+
+void Matrix::normalize(double a, double b) {
+	for (int j = 0; j < numCol; j++) {
+		int min = get(0, j);
+		int max = min;
+		for (int i = 1; i < numRow; i++) {
+			if (get(i, j) < min) {
+				min = get(i, j);
+			} else if (get(i, j) > max) {
+				max = get(i, j);
+			}
+		}
+		for (int i = 0; i < numRow; i++) {
+			set(i, j, (b - a) * ((get(i, j) - min) / (max - min)) + a);
+		}
+	}
+}
+
+void Matrix::rowToVector(int row, vector<double>& v) {
+	if (row >= numRow || row < 0) {
+		cerr << "Invalid Row (rowToVector)" << endl;
+		throw exception();
+	} else {
+		v = m.at(row);
+	}
+}
+
+void Matrix::colToVector(int col, vector<double>& v) {
+	if (col >= numCol || col < 0) {
+		cerr << "Invalid Column (colToVector)" << endl;
+		throw exception();
+	} else {
+		for (int j = 0; j < numRow; j++) {
+			v.push_back(m.at(j).at(col));
+		}
+	}
+}
+
+int Matrix::getNumRow() const {
+	return numRow;
+}
+
+}
diff --git a/src/cluster/src/Matrix.h b/src/cluster/src/Matrix.h
new file mode 100644
index 0000000..46a73a6
--- /dev/null
+++ b/src/cluster/src/Matrix.h
@@ -0,0 +1,52 @@
+/*
+ * matrix.h
+ *
+ * Created on: May 10, 2017
+ * Author: Robert Geraghty, The Bioinformatics Toolsmith Laboratory, The University of Tulsa
+ */
+
+
+#ifndef MATRIX_H_
+#define MATRIX_H_
+
+#include <vector>
+#include <string>
+
+namespace matrix {
+
+class Matrix
+{
+private:
+	std::vector<std::vector<double> > m;
+	int numRow;
+	int numCol;
+
+
+public:
+
+	Matrix(int r, int c);
+	Matrix();
+	~Matrix();
+	Matrix operator+(Matrix n);
+	Matrix operator-(Matrix n);
+	Matrix operator*(Matrix n);
+	Matrix transpose();
+	Matrix gaussJordanInverse();
+	Matrix pseudoInverse();
+	void userFill();
+	double determinant();
+	double get(int r, int c) const;
+	void set(int r, int c, double val);
+	void addRow(double);
+	void addCol(double);
+	void print();
+	void printToFile(std::string);
+	void randFill(double low, double high);
+	void fileFill(std::string filename);
+	void normalize(double a, double b);
+	void rowToVector(int, std::vector<double>&);
+	void colToVector(int, std::vector<double>&);
+	int getNumRow() const;
+};
+}
+#endif /* MATRIX_H_ */
diff --git a/src/cluster/src/MultiMute.cpp b/src/cluster/src/MultiMute.cpp
new file mode 100644
index 0000000..73ee242
--- /dev/null
+++ b/src/cluster/src/MultiMute.cpp
@@ -0,0 +1,455 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to perform non single mutations on sequences
+ */
+
+#include "MultiMute.h"
+#include <sstream>
+#include "Random.h"
+
+MultiMute::MultiMute(int a, int c, int g, int t, int alloc, bool enableTrans, bool enableRev)
+{
+	percAs = a;
+	percCs = c;
+	percGs = g;
+	percTs = t;
+	//Set all sub allocations to 0 if the total allocation is 0
+	maxTrans = 0;
+	maxInsert = 0;
+	maxReverse = 0;
+	maxDup = 0;
+	maxDel = 0;
+	total_alloc = alloc;
+//	max_block_size = std::max(2, std::min(50, alloc / 10)); // Max mutation block size
+//	cout << "max block size: " << max_block_size << endl;
+	if (alloc == 0) {
+		return;
+	}
+	//Arbitrary, if only 1 percent is allocated overall, it is allocated to Insert
+	else if (alloc == 1) {
+		maxTrans = 0;
+		maxReverse = 0;
+		maxInsert = 1;
+		maxDup = 0;
+		maxDel = 0;
+	} else if (enableTrans) {
+		if (alloc > 1) {
+			maxTrans = rand() % alloc;
+			alloc -= maxTrans;
+		}
+	} else if (enableRev) {
+		if (alloc > 1) {
+			maxReverse = rand() % alloc;
+			alloc -= maxReverse;
+		}
+	}
+
+	if (alloc > 1) {
+		maxDel = (rand() % alloc);
+		alloc -= maxDel;
+	}
+	if (alloc > 0) {
+		maxDup = rand() % alloc;
+		alloc -= maxDup;
+	} else {
+		maxDup = 0;
+		if (alloc == 1) {
+			alloc--;
+			maxDel++;
+		}
+	}
+	maxInsert = alloc;
+
+//	cout << "Max Rev " << maxReverse << " maxDel " << maxDel << " maxTrans " << maxTrans << " maxInsert " << maxInsert << " maxDup " << maxDup << endl;
+}
+
+int MultiMute::getAlignmentLength() {
+	return alignmentLength;
+}
+
+int MultiMute::getIBP() {
+	return IBP;
+}
+
+std::string abbreviated_vec(const vector<string>& v)
+{
+	std::ostringstream oss;
+	char last = ' ';
+	int block_num = 1;
+	for (auto s : v) {
+		for (auto c : s) {
+			if (c == last) {
+				block_num++;
+			} else if (last != ' ') {
+				oss << block_num << last;
+				block_num = 1;
+				last = c;
+			} else {
+				last = c;
+			}
+		}
+	}
+	return oss.str();
+}
+
+int get_num_blocks(const vector<string>& v)
+{
+	cout << "Abbrev: " << abbreviated_vec(v) << endl;
+	char last = '\0';
+	int n_blocks = 0;
+	for (auto s : v) {
+		for (char c : s) {
+			if (c == 'S') {
+				last = '\0';
+			} else if (c != last) {
+				last = c;
+				n_blocks++;
+			}
+		}
+	}
+	return n_blocks;
+}
+vector<bool> MultiMute::genMulti(string * sequence)
+{
+	seq = sequence;
+//
+	double mut_bp = total_alloc * seq->length() / 100.0;
+	max_block_size = std::max((int64_t)2, std::min((int64_t)50, (int64_t)round(mut_bp / 10))); // Max mutation block size
+	//Calculate the number of nucleotides allocated to each type of mutation
+	maxNonMutations = (int64_t) ((float) ((100 - maxReverse - maxTrans - maxInsert - maxDup - maxDel) / 100.0) * seq->length());
+	maxReverse = (int64_t) ((float) (maxReverse / 100.0) * seq->length());
+ 	maxTrans = (int64_t) ((float) (maxTrans / 100.0) * seq->length());
+ 	maxInsert = (int64_t) ((float) (maxInsert / 100.0) * seq->length());
+ 	maxDel = (int64_t) ((float) (maxDel / 100.0) * seq->length());
+ 	maxDup = (int64_t) ((float) (maxDup / 100.0) * seq->length());
+
+//	cout << "maxNonMutations: " << maxNonMutations << " maxReverse: " << maxReverse << " maxTrans: " << maxTrans << " maxInsert: " << maxInsert << " maxDel: " << maxDel << " maxDup: " << maxDup << endl;
+ 	//calculate alignment length and identical base pairs
+ 	alignmentLength = maxInsert + maxDup;
+ 	IBP = maxDel;
+ 	//Initialize and size vectors
+ 	int total = maxNonMutations + (2 * maxReverse) + maxTrans + maxInsert + maxTrans + maxDel + maxDup;
+ 	insertions = new vector<string>();
+ 	insertions->reserve(maxTrans + maxInsert);
+ 	mutationStrings = new vector<string>();
+ 	mutationStrings->reserve(total);
+ 	//Push 'S', which means that that is an index that wont be mutated, onto the vector
+ 	for (int i = 0; i < maxNonMutations; i++) {
+ 		mutationStrings->push_back("S");
+ 	}
+
+ 	reverse(mutationStrings);
+//#pragma omp critical
+	{
+		insert(mutationStrings);
+		translocate(mutationStrings);
+		duplicate(mutationStrings);
+		deleteNucl(mutationStrings);
+	}
+    //Make sure no palindromes exist
+ 	checkForAllPalindromes(mutationStrings);
+
+ 	//Generate a char vector from the now shuffled mutations vector
+ 	auto mutationChars = genCharVector(mutationStrings);
+ 	getTranslocations(mutationChars);
+ 	//Performs all mutations on the sequence
+ 	auto ret = formatString(seq->length() + maxTrans + maxInsert + maxDup, mutationChars);
+ 	delete mutationStrings;
+ 	delete mutationChars;
+ 	delete insertions;
+ 	return ret;
+}
+
+void MultiMute::reverse(vector<string> * toAddTo)
+{
+	//Keep forming strings until the allocation of reverse is used up
+	int size;
+	//cout << "maxReverse: " << maxReverse << endl;
+	while (maxReverse > 0) {
+		//Automatically make it 2 to avoid modulus error
+		if (maxReverse == 2) {
+			size = 2;
+		} else {
+			size = (rand() % (maxReverse - 2)) + 2;
+			//Add 1 to size if the remaining reverse allocation would be 1
+			if (maxReverse - size == 1) {
+				size++;
+			}
+		}
+		//cout << "Reverse size: " << size << endl;
+		//Add a string of the randomized size to the vector
+		string toAdd(size, 'R');
+		toAddTo->push_back(toAdd);
+		maxReverse -= size;
+	}
+}
+
+void MultiMute::translocate(vector<string> * toAddTo)
+{
+	int size;
+	//Keep forming strings until the allocation of Translocate is used up
+	while (maxTrans > 0) {
+		//Automatically make it 2 to avoid modulus error
+		if (maxTrans == 2) {
+			size = 2;
+		} else {
+			size =  rng.randMod<int>(std::min(max_block_size, maxTrans - 2)) + 2;
+			//Add 1 to size if the remaining reverse allocation would be 1
+			if (maxTrans - size == 1) {
+				size++;
+			}
+		}
+		//Add a string of the randomized size to the vector, and an I for where to translocate to
+		//cout << "maxTrans=" << maxTrans << " Translocate: " << size << endl;
+		string toAdd(size, 'T');
+		toAddTo->push_back(toAdd);
+		toAddTo->push_back("I");
+		maxTrans -= size;
+	}
+}
+
+void MultiMute::insert(vector<string> * toAddTo)
+{
+	int size;
+	//Keep forming strings until the allocation of insert is used up
+	while (maxInsert > 0) {
+		//Automatically make it 2 to avoid modulus error
+		if (maxInsert == 2) {
+			size = 2;
+		} else {
+//			size = (rand() % (maxInsert - 2)) + 2;
+			size = rng.randMod<int>(std::min(max_block_size, maxInsert - 2)) + 2;
+			//Add 1 to size if the remaining reverse allocation would be 1
+			if (maxInsert - size == 1) {
+				size++;
+			}
+		}
+		//	cout << "maxInsert=" << maxInsert << " insert " << size << endl;
+		//Add an I for where to insert, and add a generated string to the insetions vector
+		toAddTo->push_back("I");
+		insertions->push_back(genInsert(size));
+		maxInsert -= size;
+	}
+}
+
+void MultiMute::deleteNucl(vector<string> * toAddTo)
+{
+	int size;
+	//Keep forming strings until the allocation of deletion is used up
+	while (maxDel > 0) {
+		//Automatically make it 2 to avoid modulus error
+		if (maxDel == 2) {
+			size = 2;
+		} else {
+			size = rng.randMod<int>(std::min(max_block_size, maxDel - 2)) + 2;
+			//size = (rand() % (maxDel - 2)) + 2;
+			//Add 1 to size if the remaining reverse allocation would be 1
+			if (maxDel - size == 1) {
+				size++;
+			}
+		}
+		//Add a string of X's to show what nucleotides will be deleted
+//		cout << "maxDelete=" << maxDel << " delete " << size << endl;
+		string toAdd(size, 'X');
+		toAddTo->push_back(toAdd);
+		maxDel -= size;
+	}
+}
+
+void MultiMute::duplicate(vector<string> * toAddTo)
+{
+	int size;
+	//Keep forming strings until the allocation of duplicate is used up
+	while (maxDup > 0) {
+		//Automatically make it 2 to avoid modulus error
+		if (maxDup == 2) {
+			size = 2;
+		} else {
+			size = rng.randMod<int>(std::min(max_block_size, maxDup - 2)) + 2;
+//			size = (rand() % (maxDup - 2)) + 2;
+			//Add 1 to size if the remaining reverse allocation would be 1
+			if (maxDup - size == 1) {
+				size++;
+			}
+		}
+//		cout << "maxDup=" << maxDup << " duplicate " << size << endl;
+		//Add a string of D's for duplicate to the vector
+		string toAdd(size, 'D');
+		toAddTo->push_back(toAdd);
+		maxDup -= size;
+	}
+}
+
+bool MultiMute::checkPalindrome(int start, int end)
+{
+	bool equal = false;
+	for (; start < end; start++, end--) {
+		if (seq->at(start) != seq->at(end)) {
+			equal = true;
+		}
+	}
+	return equal;
+}
+
+string MultiMute::genInsert(int size)
+{
+	string toInsert;
+	toInsert.reserve(size);
+	int value;
+	//Keep adding characters based on the original distribution of nucleotides
+	for (int i = 0; i < size; i++) {
+		value = rand() % (percAs + percCs + percGs + percTs);
+		if (value < percAs) {
+			toInsert.push_back('A');
+		} else if (value < percAs + percCs) {
+			toInsert.push_back('C');
+		} else if (value < percAs + percCs + percGs) {
+			toInsert.push_back('G');
+		} else {
+			toInsert.push_back('T');
+		}
+	}
+	return toInsert;
+}
+
+vector<bool> MultiMute::formatString(int maxSize, vector<char> * mutationsChars)
+{
+	string temp;
+	temp.reserve(maxSize);
+	//vector that stores what indexes have/have not been mutated
+	vector<bool> validCharacters;
+	validCharacters.reserve(mutationsChars->size() * 2);
+	unsigned seed = 0;
+    // Use of shuffle to randomize the order
+	shuffle(insertions->begin(), insertions->end(), default_random_engine(seed));
+	int j = 0;
+	int i = 0;
+	//Goes through until the end of the sequence or the end of the chars vector is reached (should always be seq first)
+	for (; i < seq->length() && j < mutationsChars->size();) {
+		//If it is a non-mutation character, simply add the current character, increment both positions
+		if (mutationsChars->at(j) == 'S') {
+			temp.push_back(seq->at(i));
+			i++;
+			j++;
+			validCharacters.push_back(true);
+		}
+		//If it is an I, get the next insertion string and append it to the back of the mutaton string, as long as the insertion vector still has stuffing
+		else if (mutationsChars->at(j) == 'I') {
+			if (insertions->size() > 0) {
+				temp.append(insertions->back());
+				insertions->pop_back();
+			}
+			//Increment only the char vector
+			j++;
+		}
+		//For duplications, it will add each charceter, and then read a string of the added characters in the same order
+		else if (mutationsChars->at(j) == 'D') {
+			string temp2;
+			temp2.reserve(seq->length() - i);
+			for (; j < mutationsChars->size() && mutationsChars->at(j) == 'D' && i < seq->length(); j++, i++) {
+				temp2.push_back(seq->at(i));
+				temp.push_back(seq->at(i));
+				validCharacters.push_back(false);
+				validCharacters.push_back(false);
+			}
+			//I and J are not incremented because they are incremented in the loop
+			temp.append(temp2);
+		}
+		//Otherwise, skip over the nuleotide
+		else {
+			i++;
+			j++;
+		}
+	}
+	//Add any extra insertions of there are any
+	if (insertions->size() > 0) {
+		for (int k = 0; k < insertions->size(); k++) {
+			temp.append(insertions->at(k));
+		}
+	}
+	//Reassign the string pointer
+	seq->erase();
+	seq->reserve(temp.length());
+	seq->append(temp);
+	return validCharacters;
+}
+
+
+void MultiMute::getTranslocations(vector<char> * toParseFrom) {
+	for (int i = 0, j = 0; i < seq->length() && j < toParseFrom->size();) {
+		//If a T is found, the string of nucleotides with corresponding T's is copied and added to the insertion vector
+		if (toParseFrom->at(j) == 'T') {
+			string temp;
+			temp.reserve(seq->length() - i);
+			for (;j < toParseFrom->size() && toParseFrom->at(j) == 'T' && i < seq->length(); i++, j++) {
+				temp.push_back(seq->at(i));
+			}
+			insertions->push_back(temp);
+		}
+		//Skip over the I's
+		else if (toParseFrom->at(j) == 'I') {
+			j++;
+		}
+		//Otherwise, increment both
+		else {
+			j++;
+			i++;
+		}
+	}
+}
+
+vector<char> * MultiMute::genCharVector(vector<string> * toParseFrom) {
+	vector<char> * charVector = new vector<char>();
+	charVector->reserve(seq->length());
+	string temp;
+	//For every index
+	for (int i = 0; i < toParseFrom->size(); i++) {
+		temp = toParseFrom->at(i);
+		//Add each character in the string at the index, add it to the new character vector
+		for (int j = 0; j < temp.length(); j++) {
+			charVector->push_back(temp.at(j));
+		}
+	}
+	return charVector;
+}
+
+void MultiMute::checkForAllPalindromes(vector<string> * toParseFrom) {
+	int insertionChanges = 0;
+	for (int i = 0, j = 0; i < seq->length() && j < toParseFrom->size();) {
+		//If it is not a reversal
+		if (toParseFrom->at(j).at(0) != 'R') {
+			//If it is an insertion character, only increment the vector integer
+			if (toParseFrom->at(j).at(0) == 'I') {
+				j++;
+			}
+			//Otherwise, increment the string iterator by the length of the current string in the vector,
+			//then increment the vector integer
+			else {
+				i += toParseFrom->at(j).length();
+				j++;
+
+			}
+		} else {
+			//If it is not a palindrome, incremtn as in the if statement
+			if (checkPalindrome(i, i + toParseFrom->at(j).length() - 1)) {
+				i += toParseFrom->at(j).length();
+				j++;
+			}
+			//Otherwise, replace the reverse with a transversal
+			else {
+				string temp(toParseFrom->at(j).length(), 'T');
+				toParseFrom->at(j) = temp;
+				insertionChanges++;
+			}
+		}
+	}
+	//Insert enough I's randomly for the amount of transversals that replaced reversals
+	for (int i = 0; i < insertionChanges; i++) {
+		int index = rand() % toParseFrom->size();
+		toParseFrom->insert(toParseFrom->begin() + index, "I");
+	}
+}
diff --git a/src/cluster/src/MultiMute.h b/src/cluster/src/MultiMute.h
new file mode 100644
index 0000000..8d27d6e
--- /dev/null
+++ b/src/cluster/src/MultiMute.h
@@ -0,0 +1,142 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to perform non single mutations on sequences
+ */
+
+#ifndef MULTIMUTE_H
+#define  MULTIMUTE_H
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <random>
+#include "Random.h"
+
+using namespace std;
+
+class MultiMute {
+public:
+	/*
+	 Constructor, creates values
+	 	and assignes allocations based on inputted data
+
+	 @param:
+	 int: percentage of A's
+	 int: percentage of C's
+	 int: percentage of G's
+	 int: percentage of T's
+	 int: The total allocation for non-single mutations
+	 int: bool to exclude Translocate and reverse, 1 for disable, any other umber for include
+	 */
+	MultiMute(int, int, int, int, int, bool, bool);
+	/*
+	 Takes in a string pointer,
+	 	and mutates it based on the allocation given to the constructor.
+	 	Returns a vector of all valid and invalid indexes
+
+	 @param:
+	 std::string *: pointer to the string to be mutated
+
+	 @return:
+	 std::vector<bool>: vector of mutations,
+	 											false means that index has been mutated
+	 */
+	std::vector<bool> genMulti(std::string *);
+	int getAlignmentLength();
+	int getIBP();
+
+	private:
+	int percAs;
+	int percCs;
+	int percGs;
+	int percTs;
+	int64_t maxReverse;
+	int64_t maxInsert;
+	int64_t maxTrans;
+	int64_t maxDel;
+	int64_t maxDup;
+	int64_t maxNonMutations;
+	int64_t alignmentLength;
+	int64_t IBP;
+	int64_t total_alloc;
+	Random rng;
+
+	int64_t max_block_size;
+	std::vector<std::string> * insertions;
+	std::vector<string> * mutationStrings;
+	std::string * seq;
+	/*
+	 Takes in a vector
+	 */
+	void reverse(vector<string> *);
+	/*
+	 Translocates a random, nonmutaded part of the sequence,
+	 	no larger than its max allocation
+	 */
+	void translocate(vector<string> *);
+	/*
+	 Inserts at random, nonmutaded part of the sequence,
+	 	no larger than its max allocation
+	 */
+	void insert(vector<string> *);
+	/*
+	 Deletes a random, nonmutaded part of the sequence,
+	 	no larger than its max allocation
+	 */
+	void deleteNucl(vector<string> *);
+	/*
+	 Duplicates a random, nonmutaded part of the sequence,
+	 	no larger than its max allocation
+	 	to an index directly after the duplicated string
+	 */
+	void duplicate(vector<string> *);
+	/*
+	 Checks inclusively, [first, last], if a portion is valid
+
+	 @param:
+	 int: The starting index (first)
+	 int: The ending index (last)
+
+	 @return:
+	 bool: true if all indexes in range are valid
+	 */
+	bool checkPalindrome(int, int);
+	void checkForAllPalindromes(vector<string> *);
+	/*
+	 Marks all indexes in the range as invalid
+
+	 @param:
+	 int: first index to be marked false
+	 int: last index tobe marked false
+	 */
+	vector<bool> formatString(int, vector<char> *);
+
+	/*
+		Generates a randomized string based on the inputed size
+		@param:
+		int: size of string to generate
+		@return
+		string: randomized string
+	*/
+	std::string genInsert(int);
+	/*
+		Adds all translocations to the insertions array
+		@param:
+		vector<char> *: pointer to a char vector with mutation characters
+	*/
+	void getTranslocations(vector<char> *);
+	/*
+		converts a vector of strings into a vector of chars
+		@param:
+		vector<string> *: the vector to be converted
+		@return:
+		vector<char> *: the vector of characters
+	*/
+	vector<char> * genCharVector(vector<string> *);
+};
+#endif
diff --git a/src/cluster/src/NearestNeighbor.h b/src/cluster/src/NearestNeighbor.h
new file mode 100644
index 0000000..a59b87b
--- /dev/null
+++ b/src/cluster/src/NearestNeighbor.h
@@ -0,0 +1,52 @@
+/* -*- C++ -*-
+ *
+ * NearestNeighbor.h
+ *
+ * Author: Benjamin T James
+ */
+#ifndef NEARESTNEIGHBOR_H
+#define NEARESTNEIGHBOR_H
+// #include <ANN/ANN.h>
+// #include "Point.h"
+// template<class T>
+// class NearestNeighbor {
+// public:
+// 	NearestNeighbor(const vector<Point<T>*> &pts) : points(pts) {
+// 		const int dim = pts[0]->get_data().size();
+// 		const int maxPts = pts.size();
+// 		dataPts = annAllocPts(maxPts, dim);
+// 		queryPt = annAllocPt(dim);
+// 		for (int nPts = 0; nPts < maxPts; nPts++) {
+// 			auto vec = pts[nPts]->get_data();
+// 			for (int i = 0; i < vec.size(); i++) {
+// 				dataPts[nPts][i] = vec[i];
+// 			}
+// 		}
+// 		kd_tree = new ANNkd_tree(dataPts, maxPts, dim);
+// 		nnIdx = new ANNidx[1];
+// 		dists = new ANNdist[1];
+// 	};
+// 	~NearestNeighbor() {
+// 		delete[] nnIdx;
+// 		delete[] dists;
+// 		delete kd_tree;
+// 		annClose();
+// 	};
+// 	void find_nearest_neighbor(Point<T> &center) const {
+// 		auto vec = center.get_data();
+// 		for (int i = 0; i < vec.size(); i++) {
+// 			queryPt[i] = vec[i];
+// 		}
+// 		kd_tree->annkSearch(queryPt, 1, nnIdx, dists);
+// 		ANNidx idx = nnIdx[0];
+// 		center.set(*points[idx]);
+// 	};
+// private:
+// 	ANNkd_tree *kd_tree = NULL;
+// 	ANNpointArray dataPts;
+// 	ANNpoint queryPt;
+// 	ANNidxArray nnIdx;
+// 	ANNdistArray dists;
+// 	const vector<Point<T>*> &points;
+// };
+#endif
diff --git a/src/cluster/src/Point.h b/src/cluster/src/Point.h
new file mode 100644
index 0000000..a70bc20
--- /dev/null
+++ b/src/cluster/src/Point.h
@@ -0,0 +1,83 @@
+/* -*- C++ -*-
+ *
+ * Point.h
+ *
+ * Author: Benjamin T James
+ *
+ * For some reason this class was made pure virtual
+ * in early development of MeShClust, making Histogram
+ * and DivergencePoint both derivatives that essentially
+ * did the same thing
+ */
+#ifndef POINT_H
+#define POINT_H
+
+#include <string>
+#include "../../nonltr/ChromosomeOneDigit.h"
+
+/*
+ * Pure virtual class that defines behavior for
+ * points. Has clone() and create() that allow for
+ * polymorphic behavior
+ */
+template<class T>
+class Point {
+public:
+	virtual ~Point() { data.clear(); };
+	virtual void operator*=(double d) = 0;
+	virtual void operator/=(double d) = 0;
+	virtual bool operator<(Point<T>& p) const = 0;
+	virtual uint64_t operator-(const Point<T>& p) const = 0;
+	virtual void operator+=(Point<T>& p) = 0;
+	virtual void set(Point<T>& p) = 0;
+	virtual void display() const = 0;
+	virtual uint64_t distance(const Point<T>& p) const = 0;
+	virtual double distance_d(Point<double>& p) const = 0;
+	virtual Point* clone() const = 0;
+	virtual Point* create() const = 0;
+
+	virtual void zero() = 0;
+	virtual void addOne() = 0;
+	virtual double distance_k1(const Point<T>& p) const = 0;
+	virtual double prob_under(Point<T>& center) const = 0;
+	virtual void subOne() = 0;
+	virtual uint64_t getRealMagnitude() const = 0;
+//	virtual T magnitude() const = 0;
+	virtual bool is_to_delete() const = 0;
+	virtual void set_to_delete(bool b) = 0;
+
+	virtual Point<double>* create_double() const = 0;
+	virtual void set_arg_to_this_d(Point<double>& p) const = 0;
+
+	virtual const vector<T>& get_data() const = 0;
+
+	void set_header(const std::string c) { header = c; };
+	const std::string get_header() const { return header; };
+
+	void set_data_str(const std::string& c) { data = c; };
+	const std::string & get_data_str() const { return data; };
+
+	void set_1mers(const vector<uint64_t> &vec) {
+		for (auto i = 0; i < 4; i++) {
+			one_mers[i] = vec[i];
+		}
+	}
+	vector<uint64_t> get_1mers() const {
+		vector<uint64_t> vec;
+		for (auto i = 0; i < 4; i++) {
+			vec.push_back(one_mers[i]);
+		}
+		return vec;
+	}
+	virtual unsigned long size() const = 0;
+	virtual void set_id(uintmax_t c_id) = 0;//{ id = c_id; };
+	virtual const uintmax_t get_id() const = 0;//{ return id; };
+	virtual void set_length(unsigned long len) = 0;
+	virtual unsigned long get_length() const = 0;
+private:
+	uint64_t one_mers[4];
+        std::string header;
+	std::string data;
+};
+
+#endif
diff --git a/src/cluster/src/Predictor.cpp b/src/cluster/src/Predictor.cpp
new file mode 100644
index 0000000..8ff4755
--- /dev/null
+++ b/src/cluster/src/Predictor.cpp
@@ -0,0 +1,837 @@
+/* -*- C++ -*-
+ *
+ * Predictor.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Predictor implementation class
+ * train(vector<>...) is entry point, generates "semi-synthetic" sequences
+ * train() actually trains applicable GLM's.
+ * close() and similarity() are callable once trained
+ */
+#include "Predictor.h"
+#include "Loader.h"
+#include "Matrix.h"
+#include "ClusterFactory.h"
+#include "HandleSeq.h"
+#include "Progress.h"
+#include "Random.h"
+#include <algorithm>
+
+template<class T>
+void Predictor<T>::save(std::string file)
+{
+	std::ofstream out(file);
+	out << "k: " << k << endl;
+	out << "mode: " << (unsigned int)mode << endl;
+	out << "max_features: " << max_num_feat << endl;
+	out << "ID: " << id << endl;
+	if (mode & PRED_MODE_CLASS) {
+		write_to(out, feat_c, c_glm);
+	}
+	if (mode & PRED_MODE_REGR) {
+		write_to(out, feat_r, r_glm);
+	}
+}
+
+template<class T>
+Predictor<T>::Predictor(const std::string filename)
+{
+	std::ifstream in(filename);
+	std::string buf;
+	unsigned mode_ = 0;
+	in >> buf >> k;
+	cout << buf << k << endl;
+	in >> buf >> mode_;
+	mode = mode_;
+	cout << buf << mode << endl;
+	in >> buf >> max_num_feat;
+	cout << buf << max_num_feat << endl;
+	in >> buf >> id;
+	cout << buf << id << endl;
+	is_trained = true;
+	is_training = false;
+	if (mode & PRED_MODE_CLASS) {
+		auto pr = read_from(in, k);
+		c_glm = pr.first;
+		feat_c = pr.second;
+	}
+	if (mode & PRED_MODE_REGR) {
+		auto pr = read_from(in, k);
+		r_glm = pr.first;
+		feat_r = pr.second;
+	}
+}
+
+template<class T>
+void Predictor<T>::write_to(std::ofstream &out, Feature<T>* feat, matrix::GLM glm)
+{
+	auto combos = feat->get_combos();
+	auto lookup = feat->get_lookup();
+	auto mins = feat->get_mins();
+	auto maxs = feat->get_maxs();
+	out << std::endl << "n_combos: " << combos.size() << std::endl;
+	out << glm.get_weights().get(0, 0) << endl;
+	for (int j = 0; j < combos.size(); j++) {
+		auto cmb = combos[j];
+		unsigned int val = 0;
+		uint64_t flags = 0;
+		for (auto i : cmb.second) {
+			flags |= lookup[i];
+		}
+		switch (cmb.first) {
+		case Combo::xy:
+			val = 0;
+			break;
+		case Combo::xy2:
+			val = 1;
+			break;
+		case Combo::x2y:
+			val = 2;
+			break;
+		case Combo::x2y2:
+			val = 3;
+			break;
+		}
+		out << val << " ";
+		out << flags << " ";
+		out << glm.get_weights().get(j+1, 0) << std::endl;
+	}
+	out << std::endl << "n_singles: " << lookup.size() << std::endl;
+	for (int j = 0; j < lookup.size(); j++) {
+		out << lookup[j] << " ";
+		out << mins[j] << " ";
+		out << maxs[j] << std::endl;
+	}
+}
+
+
+template<class T>
+pair<matrix::GLM, Feature<T>*> Predictor<T>::read_from(std::ifstream& in, int k_)
+{
+	matrix::GLM glm;
+	int c_num_raw_feat, c_num_combos;
+	Feature<T> *feat = new Feature<T>(k_);
+	std::string buf;
+	in >> buf >> c_num_combos;
+	cout << buf << "\"" << c_num_combos << "\"" << endl;
+	matrix::Matrix weights(c_num_combos+1, 1);
+	double d_;
+	in >> d_;
+	weights.set(0, 0, d_);
+	for (int i = 0; i < c_num_combos; i++) {
+		int cmb;
+		in >> cmb;
+		cout << (int)cmb << endl;
+		uint64_t flags;
+		in >> flags;
+		cout << flags << endl;
+		double d;
+		in >> d;
+		cout << "[" << 0 << "," << i << "] " << d << endl;
+		weights.set(i+1, 0, d);//push_back(d);
+		Combo cmb_ = Combo::xy;
+		switch (cmb) {
+		case 0:
+			cmb_ = Combo::xy;
+			break;
+		case 1:
+			cmb_ = Combo::xy2;
+			break;
+		case 2:
+			cmb_ = Combo::x2y;
+			break;
+		case 3:
+			cmb_ = Combo::x2y2;
+			break;
+		default:
+			cerr << "error reading weights file" << endl;
+			break;
+		}
+		feat->add_feature(flags, cmb_);
+	}
+
+	in >> buf >> c_num_raw_feat;
+	cout << buf << "\"" << c_num_raw_feat << "\"" << endl;
+	for (int i = 0; i < c_num_raw_feat; i++) {
+		uint64_t single_flag;
+		double min_, max_;
+		in >> single_flag;
+		cout << single_flag << endl;
+		in >> min_;
+		cout << min_ << endl;
+		in >> max_;
+		cout << max_ << endl;
+		feat->set_normal(single_flag, min_, max_);
+	}
+	feat->finalize();
+	glm.load(weights);
+	return {glm, feat};
+}
+
+template<class T>
+void Predictor<T>::add_feats(std::vector<std::pair<uint64_t, Combo> >& vec, uint64_t feat_flags)
+{
+	for (uint64_t i = 1; i <= feat_flags; i *= 2) {
+		if ((i & feat_flags) == 0) {
+			continue;
+		}
+		for (uint64_t j = 1; j <= i; j *= 2) {
+			if ((j & feat_flags) == 0) {
+				continue;
+			}
+			vec.emplace_back(i | j, Combo::xy);
+			vec.emplace_back(i | j, Combo::x2y2);
+			if (i != j) {
+				vec.emplace_back(i | j, Combo::x2y);
+				vec.emplace_back(i | j, Combo::xy2);
+			}
+		}
+	}
+}
+template<class T>
+void Predictor<T>::check()
+{
+	// if (!is_trained && training.size() >= threshold && !is_training) {
+	// 	omp_set_lock(&lock);
+	// 	is_training = true;
+	// 	train();
+	// 	is_training = false;
+	// 	omp_unset_lock(&lock);
+	// }
+}
+template<class T>
+double Predictor<T>::similarity(Point<T>* a, Point<T>* b)
+{
+	if (!is_trained) {
+//		double d = Selector<T>::align(a, b);
+		cerr << "alignment: we don't do that here" << endl;
+		throw "Bad";
+		//		return d;
+		// if (!is_training) {
+		// 	omp_set_lock(&lock);
+		// 	if (training.size() < testing.size() && training.size() < threshold) {
+		// 		training.push_back(pra<T>(a, b, d));
+		// 	} else if (training.size() >= testing.size() && testing.size() < threshold) {
+		// 		testing.push_back(pra<T>(a, b, d));
+		// 	}
+		// 	omp_unset_lock(&lock);
+		// }
+		return 0;
+
+	} else {
+		return predict(a, b);
+	}
+}
+
+template<class T>
+bool Predictor<T>::close(Point<T> *a, Point<T> *b)
+{
+	if (!is_trained) {
+//		double d = Selector<T>::align(a, b);
+		cerr << "alignment shouldn't be used here" << endl;
+		throw "bad";
+		// if (!is_training) {
+		// 	omp_set_lock(&lock);
+		// 	if (training.size() < testing.size() && training.size() < threshold) {
+		// 		training.push_back(pra<T>(a, b, d));
+		// 	} else if (training.size() >= testing.size() && testing.size() < threshold) {
+		// 		testing.push_back(pra<T>(a, b, d));
+		// 	}
+		// 	omp_unset_lock(&lock);
+		// }
+//		return d > id;
+		return false;
+	}
+	bool val = p_close(a, b);
+	if ((mode & PRED_MODE_REGR) && val) {
+		// val = p_predict(a, b) > id;
+		// if (!val) {
+		// 	cout << "FIXED" << endl;
+		// }
+	}
+	return val;
+}
+
+template<class T>
+double Predictor<T>::p_predict(Point<T>* a, Point<T>* b)
+{
+	auto cache = feat_r->compute(*a, *b);
+	auto weights = r_glm.get_weights();
+	double sum = weights.get(0, 0);
+	for (int col = 0; col < feat_r->size(); col++) {
+		double val = (*feat_r)(col, cache);
+		sum += weights.get(col+1, 0) * val;
+	}
+	if (sum < 0) {
+		sum = 0;
+	} else if (sum > 1) {
+		sum = 1;
+	}
+	return sum;
+}
+template<class T>
+double Predictor<T>::predict(Point<T>* a, Point<T>* b)
+{
+	if ((mode & PRED_MODE_CLASS) && !p_close(a, b)) {
+		return 0;
+	}
+	return p_predict(a, b);
+}
+
+template<class T>
+bool Predictor<T>::p_close(Point<T>* a, Point<T>* b)
+{
+	auto weights = c_glm.get_weights();
+	double sum = weights.get(0, 0);
+	auto cache = feat_c->compute(*a, *b);
+	for (int col = 1; col < weights.getNumRow(); col++) {
+		double d = (*feat_c)(col-1, cache);
+		sum += weights.get(col, 0) * d;
+	}
+	return sum > 0.0;
+}
+
+
+template<class T>
+std::pair<matrix::Matrix,matrix::Matrix> generate_feat_mat(const vector<pra<T> > &data, Feature<T>& feat, double cutoff)
+{
+	bool classify = (cutoff >= 0);
+	int nrows = data.size();
+	int ncols = feat.size()+1;
+	matrix::Matrix feat_mat(nrows, ncols);
+	matrix::Matrix labels(nrows, 1);
+	#pragma omp parallel for
+	for (int row = 0; row < data.size(); row++) {
+		auto kv = data.at(row);
+		vector<double> cache;
+ 		// #pragma omp critical
+		// {
+			cache = feat.compute(*kv.first, *kv.second);
+		// }
+		feat_mat.set(row, 0, 1);
+		if (classify) {
+			labels.set(row, 0, kv.val >= cutoff ? 1 : -1);
+		} else {
+			labels.set(row, 0, kv.val);
+		}
+		for (int col = 1; col < ncols; col++) {
+			double val = feat(col-1, cache);
+			feat_mat.set(row, col, val);
+		}
+	}
+	return std::make_pair(feat_mat, labels);
+}
+
+template<class T>
+void Predictor<T>::train(const vector<Point<T> *> &points, const vector<Point<T>* > &queries, uintmax_t &_id, size_t num_sample)
+{
+	if (is_trained) { return; }
+
+	num_sample = min(num_sample, points.size());
+
+	vector<Point<T>*> f_points_tr, f_points_test;
+	size_t total_size = points.size();// + queries.size();
+	for (int i = 0; i < num_sample; i++) {
+		int i1 = floor((double)i * total_size / (2 * num_sample));
+		int i2 = floor((i + 1) * (double)total_size / (2 * num_sample));
+		f_points_tr.push_back(points.at(i1));
+		f_points_test.push_back(points.at(i2));
+	}
+	// size_t q_sample = min(num_sample / 10, queries.size());
+	// while (10 * f_points_tr.size() <= 11 * num_sample) {
+	// 	for (int i = 0; i < q_sample; i++) {
+	// 		int i1 = floor((double)i * queries.size() / (2 * q_sample));
+	// 		int i2 = floor((i + 1) * (double)queries.size() / (2 * q_sample));
+	// 		f_points_tr.push_back(queries.at(i1));
+	// 		f_points_test.push_back(queries.at(i2));
+	// 	}
+	// }
+	training.clear();
+	testing.clear();
+	if (mode & PRED_MODE_CLASS) {
+
+		std::vector<pra<T> > pos_buf, neg_buf;
+		cout << "mutating sequences" << endl;
+		size_t counter = 0;
+		// struct timespec start, stop;
+		// clock_gettime(CLOCK_MONOTONIC, &start);
+		#pragma omp parallel for
+		for (size_t i = 0; i < f_points_tr.size(); i++) {
+			auto p = f_points_tr[i];
+			mutate_seqs(p, 5, pos_buf, neg_buf, 100 * id, 100, _id);
+			mutate_seqs(p, 5, pos_buf, neg_buf, min_id, 100 * id, _id);
+			#pragma omp critical
+			cout << "Generated " << ++counter << " / " << f_points_tr.size() << endl;
+		}
+		// clock_gettime(CLOCK_MONOTONIC, &stop);
+		// printf("took %lu\n", stop.tv_sec - start.tv_sec);
+
+		counter = 0;
+		size_t buf_size = std::min(pos_buf.size(), neg_buf.size());
+		cout << "training +: " << pos_buf.size() << endl;
+		cout << "training -: " << neg_buf.size() << endl;
+		std::random_shuffle(pos_buf.begin(), pos_buf.end());
+		std::random_shuffle(neg_buf.begin(), neg_buf.end());
+		for (size_t i = 0; i < buf_size; i++) {
+			training.push_back(pos_buf[i].deep_clone());
+			training.push_back(neg_buf[i].deep_clone());
+		}
+		for (auto p : pos_buf) {
+			delete p.first;
+			delete p.second;
+		}
+		for (auto p : neg_buf) {
+			delete p.first;
+			delete p.second;
+		}
+		pos_buf.clear();
+		neg_buf.clear();
+		#pragma omp parallel for
+		for (size_t i = 0; i < f_points_test.size(); i++) {
+			auto p = f_points_test[i];
+			mutate_seqs(p, 5, pos_buf, neg_buf, 100 * id, 100, _id);
+			mutate_seqs(p, 5, pos_buf, neg_buf, min_id, 100 * id, _id);
+#pragma omp critical
+			cout << "Generated " << ++counter << " / " << f_points_test.size() << endl;
+		}
+		buf_size = std::min(pos_buf.size(), neg_buf.size());
+		cout << "testing +: " << pos_buf.size() << endl;
+		cout << "testing -: " << neg_buf.size() << endl;
+		std::random_shuffle(pos_buf.begin(), pos_buf.end());
+		std::random_shuffle(neg_buf.begin(), neg_buf.end());
+		for (size_t i = 0; i < buf_size; i++) {
+			testing.push_back(pos_buf[i].deep_clone());
+			testing.push_back(neg_buf[i].deep_clone());
+		}
+		for (auto p : pos_buf) {
+			delete p.first;
+			delete p.second;
+		}
+		for (auto p : neg_buf) {
+			delete p.first;
+			delete p.second;
+		}
+	} else {
+		for (auto p : f_points_tr) {
+			mutate_seqs(p, 10, training, training, min_id, 100, _id);
+		}
+		for (auto p : f_points_test) {
+			mutate_seqs(p, 10, testing, testing, min_id, 100, _id);
+		}
+	}
+
+
+	train();
+}
+template<class T>
+std::pair<double, matrix::GLM> regression_train(const vector<pra<T> > &data, Feature<T>& feat)
+{
+	auto pr = generate_feat_mat(data, feat, -1);
+	matrix::GLM glm;
+	glm.train(pr.first, pr.second);
+	auto result1 = pr.first * glm.get_weights();
+	auto diff1 = result1 - pr.second;
+	double sum = 0;
+	for (int i = 0; i < diff1.getNumRow(); i++) {
+		sum += fabs(diff1.get(i, 0));
+	}
+	sum /= diff1.getNumRow();
+	return {sum, glm};
+}
+
+template<class T>
+std::pair<double, matrix::GLM> class_train(vector<pra<T> > &data, Feature<T>& feat, double cutoff)
+{
+	// vector<pra<T> > above, below;
+
+	// for (auto d : data) {
+	// 	if (d.val > cutoff) {
+	// 		above.push_back(d);
+	// 	} else {
+	// 		below.push_back(d);
+	// 	}
+	// }
+	// size_t sz = std::min(above.size(), below.size());
+	// data.clear();
+	// for (size_t i = 0; i < sz; i++) {
+	// 	data.push_back(above[i]);
+	// 	data.push_back(below[i]);
+	// }
+	auto pr = generate_feat_mat(data, feat, cutoff);
+	matrix::GLM glm;
+	glm.train(pr.first, pr.second);
+	matrix::Matrix p = glm.predict(pr.first);
+	for (int row = 0; row < p.getNumRow(); row++) {
+		if (p.get(row, 0) == 0) {
+			p.set(row, 0, -1);
+		}
+	}
+	double acc = get<0>(glm.accuracy(pr.second, p));
+	return {acc, glm};
+}
+
+template<class T>
+double regression_test(const vector<pra<T> >& data, Feature<T>& feat, const matrix::GLM& glm, std::string prefix="")
+{
+	auto pr = generate_feat_mat(data, feat, -1);
+	auto result1 = pr.first * glm.get_weights();
+	auto diff1 = result1 - pr.second;
+	double sum = 0;
+	for (int i = 0; i < diff1.getNumRow(); i++) {
+		sum += fabs(diff1.get(i, 0));
+	}
+	if (prefix != "") {
+		for (int row = 0; row < result1.getNumRow(); row++) {
+			cout << prefix << ";" << data[row].first->get_header() << ";" << data[row].second->get_header() << ";" << result1.get(row, 0) << ";" << pr.second.get(row, 0) << ";" << diff1.get(row, 0) << endl;
+		}
+	}
+	sum /= diff1.getNumRow();
+	return sum;
+}
+
+template<class T>
+void print_wrong(matrix::Matrix oLabels, matrix::Matrix pLabels)
+{
+	for(int i = 0; i < oLabels.getNumRow(); i++){
+	        if(oLabels.get(i,0) == pLabels.get(i, 0)){
+			cout << "";
+		}
+	}
+}
+
+template<class T>
+tuple<double,double,double> class_test(const vector<pra<T> >& data, Feature<T>& feat, const matrix::GLM& glm, double cutoff, std::string prefix="")
+{
+	auto pr = generate_feat_mat(data, feat, cutoff);
+	matrix::Matrix p = glm.predict(pr.first);
+	for (int row = 0; row < p.getNumRow(); row++) {
+		if (p.get(row, 0) == 0) {
+			p.set(row, 0, -1);
+		}
+		if (prefix != "") {
+			cout << prefix << ";" << data[row].first->get_header() << ";" << data[row].second->get_header() << ";" << data[row].val << ";" << p.get(row, 0) << ";" << pr.second.get(row, 0) << endl;
+		}
+	}
+//	print_wrong(pr.second, p);
+	return glm.accuracy(pr.second, p);
+}
+
+template<class T>
+void Predictor<T>::filter(std::vector<pra<T> > &vec, std::string prefix)
+{
+	std::vector<std::vector<pra<T> > > bins;
+	std::vector<double> limits;
+	size_t num_bins = 10;
+	size_t smallest_bin_size = vec.size();
+	for (size_t i = 0; i < num_bins; i++) {
+		limits.push_back(id + i * (1 - id) / num_bins);
+		bins.push_back(std::vector<pra<T> >());
+	}
+	limits.push_back(1);
+	for (auto p : vec) {
+		for (size_t i = 1; i < limits.size(); i++) {
+			if (p.val <= limits[i] && p.val > limits[i-1]) {
+				bins[i-1].push_back(p);
+				if (prefix != "") {
+					cout << prefix << " bin " << i - 1 << " " << p.val << endl;
+				}
+				break;
+			}
+		}
+	}
+	size_t bin_size = 0;
+	for (auto &v : bins) {
+		bin_size += v.size();
+		// smallest_bin_size = std::min(smallest_bin_size, v.size());
+		std::random_shuffle(v.begin(), v.end());
+	}
+	smallest_bin_size = bin_size / bins.size();
+	vec.clear();
+
+	for (auto &v : bins) {
+		for (size_t i = 0; i < std::min(v.size(), smallest_bin_size); i++) {
+			vec.push_back(v[i]);
+		}
+	}
+	cout << "new vector size: " << vec.size() << " divided into " << bins.size() << " equal parts" << endl;
+}
+
+double rand_between(double mute, double rng, double low, double high)
+{
+	Random r;
+	double r_d = r.random();
+
+	double mn = std::max(mute - rng, low);
+	double mx = std::min(mute + rng, high);
+	return r_d * (mx - mn) + mn;
+}
+
+template<class T>
+void Predictor<T>::mutate_seqs(Point<T>* p, size_t num_seq, vector<pra<T> > &pos_buf, vector<pra<T> > &neg_buf, double id_begin, double id_end, uintmax_t& _id)
+{
+	HandleSeq h(mut_type);
+	ClusterFactory<T> factory(k);
+	double inc = (id_end - id_begin) / num_seq;
+	std::string bin_seq = p->get_data_str();
+	std::string seq;
+	for (auto c : bin_seq) {
+		switch (c) {
+		case 0:
+			seq += 'A';
+			break;
+		case 1:
+			seq += 'C';
+			break;
+		case 2:
+			seq += 'G';
+			break;
+		case 3:
+			seq += 'T';
+			break;
+		case 'N':
+			seq += 'C';
+			break;
+		default:
+			cout << "Invalid character " << c << endl;
+			cout << "from sequence " << bin_seq << endl;
+			throw 3;
+		}
+	}
+	for (size_t i = 0; i < num_seq; i++) {
+		double iter_id = id_begin + inc * (i + 0.5);
+		double actual_id = rand_between(iter_id, inc, id_begin, id_end);
+		int mut = round(100 - actual_id);
+		auto newseq = h.mutate(seq, mut);
+		std::string chrom;
+		std::string header = p->get_header();
+		Point<T>* new_pt = Loader<T>::get_point(header, newseq.second, _id, k);
+		pra<T> pr;
+		pr.first = p->clone();
+		pr.second = new_pt;
+		pr.val = newseq.first;
+#pragma omp critical
+		{
+			if (pr.val > id) {
+				pos_buf.push_back(pr);
+			} else {
+				neg_buf.push_back(pr);
+			}
+		}
+	}
+}
+template<class T>
+void Predictor<T>::train()
+{
+	Feature<T> feat(k);
+	feat.set_save(true);
+
+	uint64_t max_feat = 0;
+	for (uint64_t i = 0; i < possible_feats.size(); i++) {
+		if (possible_feats.at(i).first > max_feat) {
+			max_feat |= possible_feats.at(i).first;
+		}
+	}
+	for (uint64_t i = 1; i <= max_feat; i *= 2) {
+		if (i & max_feat) {
+			feat.add_feature(i, Combo::xy);
+		}
+	}
+	feat.normalize(training);
+	feat.normalize(testing);
+	feat.finalize();
+
+
+
+	// cout << "Class Training:" << endl;
+	// for (auto p : training) {
+	// 	cout << p.val << " ";
+	// }
+	// cout << "Class Testing:" << endl;
+	// for (auto p : testing) {
+	// 	cout << p.val << " ";
+	// }
+	if (mode & PRED_MODE_CLASS) {
+		train_class(&feat);
+		if (mode & PRED_MODE_REGR) {
+			// vector<Point<T>*> f_points_tr, f_points_test;
+			// for (int i = 0; i < 10; i++) {
+			// 	f_points_tr.push_back(training[rand()%training.size()].first);
+			// 	f_points_test.push_back(training[rand()%training.size()].first);
+			// }
+			// training.clear();
+			// testing.clear();
+			// for (auto p : f_points_tr) {
+			// 	mutate_seqs(p, 50, training, 100 * id, 100);
+			// 	mutate_seqs(p, 50, training, 60, 100 * id);
+			// }
+			// for (auto p : f_points_test) {
+			// 	mutate_seqs(p, 50, testing, 100 * id, 100);
+			// 	mutate_seqs(p, 50, testing, 60, 100 * id);
+			// }
+			// filter();
+			auto func = [&](pra<T> pr) {
+				return pr.val <= id;
+			};
+			training.erase(std::remove_if(training.begin(), training.end(), func), training.end());
+			testing.erase(std::remove_if(testing.begin(), testing.end(), func), testing.end());
+			filter(training);//, "training");
+			filter(testing);//, "testing");
+
+		}
+	}
+	if (mode & PRED_MODE_REGR) {
+		train_regr(&feat);
+	}
+	cout << "Training size: " << training.size() << endl;
+	cout << "Testing size: " << testing.size() << endl;
+	// for (auto p : training) {
+	// 	cout << p.val << " ";
+	// }
+	cout << endl;
+	feat.set_save(false);
+	training.clear();
+	testing.clear();
+	possible_feats.clear();
+	is_trained = true;
+}
+
+template<class T>
+void Predictor<T>::train_class(Feature<T>* feat)
+{
+	auto c_size = feat->get_combos().size();
+	for (int i = 0; i < c_size; i++) {
+		feat->remove_feature();
+	}
+	vector<uintmax_t> used_list;
+	double abs_best_acc = 0;
+//	cout << "possible feats at one step: " << possible_feats.size() << endl;
+	Progress prog(possible_feats.size() * max_num_feat, "Feature selection:");
+
+	std::ostringstream oss;
+	for (auto num_feat = 1; num_feat <= max_num_feat; num_feat++) {
+		double best_class_acc = abs_best_acc;
+		uintmax_t best_idx = -1, cur_idx = 1;
+		auto best_class_feat = possible_feats.front();
+		for (uint64_t i = 0; i < possible_feats.size(); i++) {
+			if (std::find(used_list.begin(), used_list.end(), i) != used_list.end()) {
+				continue;
+			}
+			auto rfeat = possible_feats[i];
+		        feat->add_feature(rfeat.first, rfeat.second);
+			feat->normalize(training);
+			feat->finalize();
+			auto name = feat->feat_names().back();
+			auto pr = class_train(training, *feat, id);
+			auto class_ac = class_test(testing, *feat, pr.second, id);
+			feat->remove_feature();
+			prog++;
+//			cout << "Feature: " << cur_idx++ << "/" << possible_feats.size() - used_list.size() << " " << num_feat << "/" << max_num_feat << " " << name  << " acc: " << get<0>(class_ac) << " sens: " << get<1>(class_ac) << " spec: " << get<2>(class_ac) << endl;
+			if (get<0>(class_ac) > best_class_acc) {
+				best_class_acc = get<0>(class_ac);
+				best_class_feat = rfeat;
+				best_idx = i;
+			}
+		}
+		if (best_class_acc > abs_best_acc || num_feat <= min_num_feat) {
+			feat->add_feature(best_class_feat.first, best_class_feat.second);
+			feat->normalize(training);
+			feat->finalize();
+			abs_best_acc = best_class_acc;
+			used_list.push_back(best_idx);
+			oss << "Feature added: " << best_class_feat.first << " " << (int)best_class_feat.second << endl;
+			oss << "Accuracy: " << best_class_acc << endl;
+			possible_feats.erase(std::remove(possible_feats.begin(), possible_feats.end(), best_class_feat), possible_feats.end());
+		}
+	}
+	prog.end();
+	cout << oss.str();
+	feat_c = new Feature<T>(*feat);
+	feat_c->set_save(false);
+	auto pr = class_train(training, *feat_c, id);
+	cout << "Training ACC: " << pr.first << endl;
+	c_glm = pr.second;
+	auto train_results = class_test(training, *feat_c, c_glm, id);//, "train");
+	cout << "Training ACC: " << get<0>(train_results) << " " << get<1>(train_results) << " " << get<2>(train_results) << endl;
+	auto test_results = class_test(testing, *feat_c, c_glm, id);//, "test");
+	double class_acc = get<0>(test_results);
+	cout << "Testing ACC: " << class_acc << " " << get<1>(test_results) << " " << get<2>(test_results) << endl;
+
+	cout << "Features: "<< endl;
+	for (auto line : feat_c->feat_names()) {
+		cout << "\t" << line << endl;
+	}
+}
+template<class T>
+void Predictor<T>::train_regr(Feature<T>* feat)
+{
+	auto c_size = feat->get_combos().size();
+	for (int i = 0; i < c_size; i++) {
+		feat->remove_feature();
+	}
+	vector<uintmax_t> used_list;
+	double abs_best_regr = 1000000;
+	for (auto num_feat = 1; num_feat <= max_num_feat; num_feat++) {
+		double best_regr_err = abs_best_regr;
+		uintmax_t best_idx = -1, cur_idx = 1;
+		auto best_regr_feat = possible_feats.front();
+		for (uint64_t i = 0; i < possible_feats.size(); i++) {
+			if (std::find(used_list.begin(), used_list.end(), i) != used_list.end()) {
+				continue;
+			}
+			auto rfeat = possible_feats[i];
+		        feat->add_feature(rfeat.first, rfeat.second);
+			feat->normalize(training);
+			feat->finalize();
+			auto pr = regression_train(training, *feat);
+			auto name = feat->feat_names().back();
+			double regr_mse = regression_test(testing, *feat, pr.second);
+			feat->remove_feature();
+
+			cout << "Feature: " << cur_idx++ << "/" << possible_feats.size() - used_list.size() << " " << num_feat << "/" << max_num_feat << " " << name << " err: " << regr_mse << endl;
+			if (regr_mse < best_regr_err) {
+				best_regr_err = regr_mse;
+				best_regr_feat = rfeat;
+				best_idx = i;
+			}
+		}
+		if (best_regr_err < abs_best_regr) {
+			feat->add_feature(best_regr_feat.first, best_regr_feat.second);
+			feat->normalize(training);
+			feat->finalize();
+			abs_best_regr = best_regr_err;
+			used_list.push_back(best_idx);
+			//possible_feats.erase(std::remove(possible_feats.begin(), possible_feats.end(), best_regr_feat), possible_feats.end());
+		}
+	}
+	feat_r = new Feature<T>(*feat);
+	feat_r->set_save(false);
+	auto pr = regression_train(training, *feat_r);
+	r_glm = pr.second;
+	double tr_regr_mse = regression_test(testing, *feat_r, r_glm); // "training"
+	cout << "Training Mean Error: " << pr.first << endl;
+	double regr_mse = regression_test(testing, *feat_r, r_glm);//, "testing");
+	cout << "Testing Mean Error: " << regr_mse << endl;
+	cout << "Features: "<< endl;
+	for (auto line : feat_r->feat_names()) {
+		cout << "\t" << line << endl;
+	}
+	// auto w = r_glm.get_weights();
+	// for (int r = 0; r < w.getNumRow(); r++) {
+	// 	for (int c = 0; c < w.getNumCol(); c++) {
+	// 		cout << w.get(r, c) << " ";
+	// 	}
+	// 	cout << endl;
+	// }
+	// for (auto combo : feat.get_combos()) {
+	// 	cout << combo.first << " " <<
+	// }
+
+}
+
+template class Predictor<uint8_t>;
+template class Predictor<uint16_t>;
+template class Predictor<uint32_t>;
+template class Predictor<uint64_t>;
+template class Predictor<int>;
+template class Predictor<double>;
diff --git a/src/cluster/src/Predictor.h b/src/cluster/src/Predictor.h
new file mode 100644
index 0000000..bf35036
--- /dev/null
+++ b/src/cluster/src/Predictor.h
@@ -0,0 +1,78 @@
+/* -*- C++ -*-
+ *
+ * Predictor.h
+ *
+ * Author: Benjamin T James
+ *
+ * Main class for training and prediction
+ * Does bulk training, but can be adapted for on-line training
+ */
+
+#ifndef PREDICTOR_H
+#define PREDICTOR_H
+
+#include "GLM.h"
+#include "Point.h"
+#include "Feature.h"
+#include <set>
+#include <omp.h>
+#define PRED_MODE_CLASS 1
+#define PRED_MODE_REGR  2
+
+#define PRED_FEAT_FAST (FEAT_EUCLIDEAN | FEAT_MANHATTAN | FEAT_INTERSECTION | FEAT_KULCZYNSKI2 | FEAT_SIMRATIO | FEAT_NORMALIZED_VECTORS | FEAT_PEARSON_COEFF | FEAT_EMD | FEAT_LENGTHD )
+#define PRED_FEAT_DIV (FEAT_JEFFEREY_DIV | FEAT_JENSEN_SHANNON)
+#define PRED_FEAT_ALL (FEAT_HELLINGER|FEAT_MANHATTAN|FEAT_EUCLIDEAN|FEAT_CHI_SQUARED|FEAT_NORMALIZED_VECTORS|FEAT_HARMONIC_MEAN|FEAT_JEFFEREY_DIV|FEAT_K_DIV|FEAT_PEARSON_COEFF|FEAT_SQCHORD|FEAT_KL_COND|FEAT_MARKOV|FEAT_INTERSECTION|FEAT_RRE_K_R|FEAT_D2z|FEAT_SIM_MM|FEAT_EUCLIDEAN_Z|FEAT_EMD|FEAT_SPEARMAN|FEAT_JACCARD|FEAT_LENGTHD|FEAT_D2s|FEAT_AFD|FEAT_MISMATCH|FEAT_CANBERRA|FEAT_KULCZYNSKI1|FEAT_KULCZYNSKI2|FEAT_SIMRATIO|FEAT_JENSEN_SHANNON|FEAT_D2_star|FEAT_N2R|FEAT_N2RC|FEAT_N2RRC)
+
+template<class T>
+class Predictor {
+public:
+	Predictor(int k_, double id_, uint8_t mode_, uint64_t feats, int mut_type_, int min_num_feat_=3, int max_num_feat_=5, double min_id_=0.35) : k(k_), id(id_), is_trained(false), is_training(false), mode(mode_), max_num_feat(max_num_feat_), mut_type(mut_type_), min_num_feat(min_num_feat_), min_id(min_id_ * 100) {
+		add_feats(possible_feats, feats);
+		feat_c = NULL;
+		feat_r = NULL;
+		omp_init_lock(&lock);
+	};
+	Predictor(const std::string filename);
+	~Predictor() {
+		possible_feats.clear();
+		omp_destroy_lock(&lock);
+		if (feat_c) {
+			delete feat_c;
+		}
+		if (feat_r) {
+			delete feat_r;
+		}
+		training.clear();
+		testing.clear();
+	}
+	void train(const std::vector<Point<T>* >& vec, const std::vector<Point<T>* >& vecq, uintmax_t& _id, size_t num_sample);
+	double similarity(Point<T>* a, Point<T>* b);
+	bool close(Point<T>* a, Point<T>* b);
+	void save(std::string file);
+	void check();
+	uint8_t get_mode() const { return mode; }
+	pair<Feature<T>*, matrix::GLM> get_class() { return std::make_pair(new Feature<T>(*feat_c), c_glm); }
+private:
+	static void add_feats(std::vector<std::pair<uint64_t, Combo> >& vec, uint64_t flags);
+	static pair<matrix::GLM, Feature<T>*> read_from(std::ifstream &in, int k_);
+	static void write_to(std::ofstream &out, Feature<T>* f, matrix::GLM glm);
+	void filter(std::vector<pra<T> > &s, std::string prefix="");
+	void train();
+	void train_class(Feature<T>* feat);
+	void train_regr(Feature<T>* feat);
+	void train_class_regr(Feature<T>* feat);
+	double predict(Point<T>* a, Point<T>* b);
+	bool p_close(Point<T>* a, Point<T>* b);
+	double p_predict(Point<T>* a, Point<T>* b);
+	void mutate_seqs(Point<T>* p, size_t num_seq, vector<pra<T> > &,vector<pra<T> > & , double id_begin, double id_end, uintmax_t& _id);
+	Feature<T> *feat_c, *feat_r;
+	matrix::GLM c_glm, r_glm;
+        vector<pra<T> > training, testing;
+	bool is_trained, is_training;
+	int min_num_feat, max_num_feat, k, mut_type;
+	uint8_t mode;
+	double id, min_id;
+	vector<std::pair<uint64_t, Combo> > possible_feats;
+	omp_lock_t lock;
+};
+#endif
diff --git a/src/cluster/src/Progress.cpp b/src/cluster/src/Progress.cpp
new file mode 100644
index 0000000..e16ef06
--- /dev/null
+++ b/src/cluster/src/Progress.cpp
@@ -0,0 +1,65 @@
+#include "Progress.h"
+#include <iostream>
+#include <sstream>
+
+Progress::Progress(long num, std::string prefix_)
+{
+	pmax = num;
+	ended = 0;
+	pcur = 0;
+	prefix = prefix_;
+	last = "";
+	barWidth = 70 - (prefix.size()+1);
+	print();
+}
+
+void Progress::print()
+{
+	std::ostringstream oss;
+	double prog = (double)pcur / pmax;
+	oss << prefix << " [";
+	int pos = barWidth * prog;
+	for (int i = 0; i < barWidth; i++) {
+		if (i < pos) {
+			oss << "=";
+		} else if (i == pos) {
+			oss << ">";
+		} else {
+			oss << " ";
+		}
+	}
+	oss << "] " << int(prog * 100.0) << " %\r";
+	if (oss.str() != last) {
+		last = oss.str();
+		std::cout << last;
+		std::cout.flush();
+	}
+}
+
+void Progress::end()
+{
+	if (!ended) {
+		pcur = pmax;
+		print();
+		std::cout << std::endl;
+	}
+	ended = true;
+}
+
+void Progress::operator++()
+{
+	pcur++;
+	print();
+}
+void Progress::operator++(int)
+{
+	print();
+	pcur++;
+}
+
+
+void Progress::operator+=(size_t num)
+{
+	pcur += num;
+	print();
+}
diff --git a/src/cluster/src/Progress.h b/src/cluster/src/Progress.h
new file mode 100644
index 0000000..f59d948
--- /dev/null
+++ b/src/cluster/src/Progress.h
@@ -0,0 +1,29 @@
+/* -*- C++ -*-
+ *
+ * Progress.h
+ *
+ * Author: Benjamin T James
+ */
+#include <iostream>
+#ifndef PROGRESS_H
+#define PROGRESS_H
+
+class Progress {
+public:
+	Progress(long num, std::string prefix_);
+	~Progress() { end(); }
+	void end();
+	void operator++();
+	void operator++(int);
+	void operator+=(size_t);
+private:
+	void print();
+	long pmax;
+	long pcur;
+	bool ended;
+	std::string prefix;
+	int barWidth;
+
+	std::string last;
+};
+#endif
diff --git a/src/cluster/src/Random.h b/src/cluster/src/Random.h
new file mode 100644
index 0000000..3131b34
--- /dev/null
+++ b/src/cluster/src/Random.h
@@ -0,0 +1,22 @@
+#ifndef RANDOM_H // -*- C++ -*-
+#define RANDOM_H
+#include <random>
+
+class Random {
+	std::mt19937 rng;
+public:
+	Random() : rng(std::random_device()()) {}
+
+	template<class T>
+	T randMod(T max) {
+		std::uniform_int_distribution<T> distribution(0, max-1);
+		return distribution(rng);
+	}
+
+	double random() {
+		std::uniform_real_distribution<double> distribution(0.0, 1.0);
+		return distribution(rng);
+	}
+};
+
+#endif
diff --git a/src/cluster/src/Runner.cpp b/src/cluster/src/Runner.cpp
new file mode 100644
index 0000000..b53449d
--- /dev/null
+++ b/src/cluster/src/Runner.cpp
@@ -0,0 +1,397 @@
+/* -*- C++ -*-
+ *
+ * Runner.cpp
+ *
+ * Author: Benjamin T James
+ */
+#include <vector>
+#include <algorithm>
+#include <cmath>
+#include <sys/stat.h>
+#include <cstdlib>
+#include "../../nonltr/ChromListMaker.h"
+#include "../../utility/AffineId.h"
+#include "Runner.h"
+#include "Trainer.h"
+#include "ClusterFactory.h"
+#include "bvec.h"
+#include "Progress.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+Runner::Runner(int argc, char **argv)
+{
+	get_opts(argc, argv);
+	if (k == -1) {
+		auto pr = find_k();
+		k = pr.first;
+	}
+	// if (similarity < 0.6) {
+	// 	align = true;
+	// }
+	if (sample_size == 0) {
+		sample_size = 300;
+	}
+	srand(10);
+}
+
+int Runner::run()
+{
+	largest_count = 0;
+	Progress progress(files.size(), "Reading in sequences");
+	for (auto i = 0; i < files.size(); i++) {
+		auto f = files.at(i);
+		ChromListMaker maker(f);
+		auto chromList = maker.makeChromOneDigitList();
+
+		progress++;
+//		cout << "Reading in sequences from " << f << "..." << endl;
+		uint64_t local_largest_count = 0;
+#pragma omp parallel for reduction(max:local_largest_count)
+	        for (int i = 0; i < chromList->size(); i++) {
+			std::vector<uint64_t> values;
+			KmerHashTable<unsigned long, uint64_t> table(k, 1);
+			ChromosomeOneDigit *chrom = dynamic_cast<ChromosomeOneDigit *>(chromList->at(i));
+			fill_table<uint64_t>(table, chrom, values);
+			uint64_t l_count = *std::max_element(values.begin(), values.end());
+			if (l_count > local_largest_count) {
+				local_largest_count = l_count;
+			}
+		}
+		if (local_largest_count > largest_count) {
+			largest_count = local_largest_count;
+		}
+	}
+	progress.end();
+
+
+	if (largest_count <= std::numeric_limits<uint8_t>::max()) {
+		cout << "Using 8 bit histograms" << endl;
+		return do_run<uint8_t>();
+	} else if (largest_count <= std::numeric_limits<uint16_t>::max()) {
+		cout << "Using 16 bit histograms" << endl;
+		return do_run<uint16_t>();
+	} else if (largest_count <= std::numeric_limits<uint32_t>::max()){
+	       	cout << "Using 32 bit histograms" << endl;
+		return do_run<uint32_t>();
+	} else if (largest_count <= std::numeric_limits<uint64_t>::max()) {
+	       	cout << "Using 64 bit histograms" << endl;
+		return do_run<uint64_t>();
+	} else {
+		throw "Too big sequence";
+	}
+}
+
+void usage(std::string progname)
+{
+	std::cout << "Usage: " << progname << " --id 0.x [OPTIONS] *.fasta" << std::endl << std::endl;
+	#ifndef VERSION
+        #define VERSION "(undefined)"
+        #endif
+        std::cout << "Version " << VERSION << " compiled on " << __DATE__ << " " << __TIME__;
+        #ifdef _OPENMP
+        std::cout << " with OpenMP " << _OPENMP;
+        #else
+        std::cout << " without OpenMP";
+        #endif
+	std::cout << std::endl;
+	std::cout << "See README for detailed options" << std::endl << std::endl;
+}
+
+
+void Runner::get_opts(int argc, char **argv)
+{
+	for (int i = 1; i < argc; i++) {
+		string arg = argv[i];
+		if (arg == "--id" && i + 1 < argc) {
+			try {
+				std::string opt = argv[i+1];
+				similarity = std::stod(opt);
+				if (similarity <= 0 || similarity >= 1) {
+					throw std::invalid_argument("");
+				}
+			} catch(std::exception e) {
+				cerr << "Similarity must be between 0 and 1" << endl;
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		} else if (arg == "--min-id" && i + 1 < argc) {
+			try {
+				std::string opt = argv[i+1];
+				min_id = std::stod(opt);
+				if (min_id <= 0 || min_id >= 1) {
+					throw std::invalid_argument("");
+				}
+			} catch(std::exception e) {
+				cerr << "Similarity must be between 0 and 1" << endl;
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		} else if ((arg == "-k" || arg == "--kmer") && i + 1 < argc) {
+			k = strtol(argv[i+1], NULL, 10);
+			if (errno) {
+				perror(argv[i+1]);
+				exit(EXIT_FAILURE);
+			} else if (k <= 0) {
+				fprintf(stderr, "K must be greater than 0.\n");
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		} else if ((arg == "-o" || arg == "--output") && i + 1 < argc) {
+			output = string(argv[i+1]);
+			i++;
+		} else if ((arg == "-s" || arg == "--sample") && i + 1 < argc) {
+			sample_size = strtol(argv[i+1], NULL, 10);
+			if (errno) {
+				perror(argv[i+1]);
+				exit(EXIT_FAILURE);
+			} else if (sample_size <= 0) {
+				fprintf(stderr, "Sample size must be greater than 0.\n");
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		// } else if ((arg == "-p" || arg == "--pivot") && i + 1 < argc) {
+		// 	pivots = strtol(argv[i+1], NULL, 10);
+		// 	if (errno) {
+		// 		perror(argv[i+1]);
+		// 		exit(EXIT_FAILURE);
+		// 	} else if (sample_size <= 0) {
+		// 		fprintf(stderr, "Points per pivot must be greater than 0.\n");
+		// 		exit(EXIT_FAILURE);
+		// 	}
+		// 	i++;
+		} else if ((arg == "--mut-type") && i + 1 < argc) {
+			std::string opt = argv[i+1];
+			if (opt == "all") {
+				mut_type = HandleSeq::BOTH | HandleSeq::ATYPICAL;
+			} else if (opt == "both") {
+				mut_type = HandleSeq::BOTH;
+			} else if (opt == "snp" || opt == "single") {
+				mut_type = HandleSeq::SINGLE;
+			} else if (opt == "nonsingle-typical") {
+				mut_type = HandleSeq::NON_SINGLE;
+			} else if (opt == "nonsingle-all") {
+				mut_type = HandleSeq::NON_SINGLE | HandleSeq::ATYPICAL;
+			} else if (opt == "all-but-reversion") {
+				mut_type = HandleSeq::BOTH | HandleSeq::TRANSLOCATION;
+			} else if (opt == "all-but-translocation") {
+				mut_type = HandleSeq::BOTH | HandleSeq::REVERSION;
+			} else {
+				cerr << "Options for mutation type are \"single\", \"nonsingle-typical\", \"both\" (for single and nonsingle-typical), \"nonsingle-all\", and \"all\" (single, nonsingle, and atypical nonsingle)." << endl;
+				exit(1);
+			}
+			i++;
+		} else if ((arg == "--feat" || arg == "-f") && i + 1 < argc) {
+			std::string opt = argv[i+1];
+			if (opt == "fast") {
+				feat_type = PRED_FEAT_FAST;
+			} else if (opt == "slow") {
+				feat_type = PRED_FEAT_FAST | PRED_FEAT_DIV;
+			} else if (opt == "extraslow") {
+				feat_type = PRED_FEAT_ALL;
+			} else {
+				cerr << "Options for feature sets are \"fast\", \"slow\", and \"extraslow\"." << endl;
+				exit(1);
+			}
+			i++;
+		} else if ((arg == "--min" || arg == "--min-feat") && i + 1 < argc) {
+			try {
+				std::string opt = argv[i+1];
+				int xx = std::stoi(opt);
+				if (xx <= 0) {
+					throw std::invalid_argument("");
+				}
+				min_n_feat = xx;
+			} catch (std::exception e) {
+				cerr << "Minimum number of features must be greater than 0." << endl;
+				exit(1);
+			}
+
+			i++;
+		} else if ((arg == "--max" || arg == "--max-feat") && i + 1 < argc) {
+			try {
+				std::string opt = argv[i+1];
+				int xx = std::stoi(opt);
+				if (xx <= 0) {
+					throw std::invalid_argument("");
+				}
+				max_n_feat = xx;
+			} catch (std::exception e) {
+				cerr << "Maximum number of features must be greater than 0." << endl;
+				exit(1);
+			}
+
+			i++;
+		} else if ((arg == "-t" || arg == "--threads") && i + 1 < argc) {
+			try {
+				std::string opt = argv[i+1];
+				int threads = std::stoi(opt);
+				if (threads <= 0) {
+					throw std::invalid_argument("");
+				}
+				#ifdef _OPENMP
+				omp_set_num_threads(threads);
+				#endif
+			} catch (std::exception e) {
+				cerr << "Number of threads must be greater than 0." << endl;
+				exit(1);
+			}
+
+			i++;
+
+		} else if ((arg == "-d" || arg == "--delta") && i + 1 < argc) {
+			delta = strtol(argv[i+1], NULL, 10);
+			if (errno) {
+				perror(argv[i+1]);
+				exit(EXIT_FAILURE);
+			} else if (delta <= 0) {
+				fprintf(stderr, "Delta must be greater than 0.\n");
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		} else if ((arg == "-i" || arg == "--iter" || arg == "--iterations") && i + 1 < argc) {
+			iterations = strtol(argv[i+1], NULL, 10);
+			if (errno) {
+				perror(argv[i+1]);
+				exit(EXIT_FAILURE);
+			} else if (iterations <= 0) {
+				fprintf(stderr, "Iterations must be greater than 0.\n");
+				exit(EXIT_FAILURE);
+			}
+			i++;
+		} else {
+			struct stat st;
+			stat(argv[i], &st);
+			if (S_ISREG(st.st_mode)) {
+				files.push_back(argv[i]);
+			} else {
+				usage(*argv);
+				exit(EXIT_FAILURE);
+			}
+		}
+	}
+	if (files.empty()) {
+		usage(*argv);
+		exit(EXIT_FAILURE);
+	}
+	if (min_n_feat > max_n_feat) {
+		cerr << "Minimum number of features (" << min_n_feat << ") cannot be greater than maximum number of features (" << max_n_feat << ")" << endl;
+		exit(1);
+	}
+}
+
+pair<int,uint64_t> Runner::find_k()
+{
+	unsigned long long count = 0, length = 0, largest_count = 0;
+        uint64_t longest_seq = 0;
+	uintmax_t num_sequences = 0;
+	for (auto f : files) {
+		ChromListMaker maker(f);
+		auto chromList = maker.makeChromOneDigitList();
+		unsigned long long l = 0;
+	        for (int i = 0; i < chromList->size(); i++) {
+			ChromosomeOneDigit *chrom = dynamic_cast<ChromosomeOneDigit *>(chromList->at(i));
+			auto sz = chrom->size();
+			l += sz;
+			if (sz > longest_seq) {
+				longest_seq = sz;
+			}
+			num_sequences++;
+
+		}
+		l /= chromList->size();
+		length += l;
+	}
+	length /= files.size();
+	int newk = ceil(log(length) / log(4)) - 1;
+	cout << "avg length: " << length << endl;
+	cout << "Recommended K: " << newk << endl;
+	return make_pair(newk, longest_seq);
+}
+
+
+double global_mat[4][4] = {{1, -1, -1, -1},
+			   {-1, 1, -1, -1},
+			   {-1, -1, 1, -1},
+			   {-1, -1, -1, 1}};
+double global_sigma = -2;
+double global_epsilon = -1;
+
+template<class T>
+int Runner::do_run()
+{
+	using pvec = vector<Point<T> *>;
+	using pmap = map<Point<T>*, pvec*>;
+
+	ClusterFactory<T> factory(k);
+	auto points = factory.build_points(files, [&](nonltr::ChromosomeOneDigit *p){ return factory.get_divergence_point(p); });
+	Trainer<T> tr(points, sample_size, largest_count, similarity, pivots, global_mat, global_sigma, global_epsilon, align ? 0 : k);
+	tr.train(min_n_feat, max_n_feat, feat_type, mut_type, min_id);
+	vector<uint64_t> lengths;
+	for (Point<T>* p : points) {
+		if (!align) {
+			p->set_data_str("");
+		}
+		lengths.push_back(p->get_length());
+	}
+	// Initializing BVec
+	bvec<T> bv(lengths, 1000);
+	lengths.clear();
+	// Inserting points into BVec
+	uint64_t idx = 0;
+	for (Point<T>* p : points) {
+		p->set_id(idx++);
+		bv.insert(p);
+	}
+	bv.insert_finalize();
+//	cout << "bv size: " << bv.report() << endl;
+	// Point<T>* mid = points[points.size()/2];
+	// auto rng = bv.get_range(mid->get_length() * 0.99,
+	// 			mid->get_length() / 0.99);
+	// auto begin = bv.iter(rng.first);
+	// auto end = bv.iter(rng.second);
+	// size_t before = bv.report();
+	// for (int i = 0; i < 1; i++) {
+	// 		bool is_min = false;
+	// 		Point<T>* p = tr.get_close(mid, begin, end, is_min);
+	// 		size_t after = bv.report();
+	// 		if (is_min) {
+	// 			string expr = (after + 1 == before) ? "true" : "false";
+	// 			if (expr == "false") {
+	// 				throw expr;
+	// 			}
+	// 			cout << expr << endl;
+	// 			cout << "is min" << endl;
+	// 		} else {
+	// 			cout << "is not min" << endl;
+	// 		}
+	// }
+	factory.MS(bv, bandwidth, similarity, tr, output, iterations, delta);
+	return 0;
+}
+
+
+template<class T>
+void Runner::print_output(const map<Point<T>*, vector<Point<T>*>*> &partition) const
+{
+	cout << "Printing output" << endl;
+	std::ofstream ofs;
+	ofs.open(output, std::ofstream::out);
+	int counter = 0;
+	for (auto const& kv : partition) {
+		if (kv.second->size() == 0) {
+			continue;
+		}
+		ofs << ">Cluster " << counter << endl;
+		int pt = 0;
+		for (auto p : *kv.second) {
+			string s = p->get_header();
+			ofs << pt << "\t"  << p->get_length() << "nt, " << s << "... " << endl;
+//			string fa = am.get(p->get_id());
+//			ofs << writefa(fa) << endl;
+			pt++;
+		}
+		counter++;
+	}
+	ofs.close();
+}
diff --git a/src/cluster/src/Runner.h b/src/cluster/src/Runner.h
new file mode 100644
index 0000000..6e04ebf
--- /dev/null
+++ b/src/cluster/src/Runner.h
@@ -0,0 +1,44 @@
+/* -*- C++ -*-
+ *
+ * Runner.h
+ *
+ * Author: Benjamin T James
+ */
+#ifndef RUNNER_H
+#define RUNNER_H
+
+#include <iostream>
+#include <map>
+#include "Point.h"
+#include "HandleSeq.h"
+#include "Predictor.h"
+using namespace std;
+
+class Runner {
+public:
+	Runner(int argc, char** argv);
+	~Runner() {};
+	int run();
+private:
+	template<class T> int do_run();
+	template<class T> void print_output(const map<Point<T>*, vector<Point<T>*>*> &m) const;
+	int k = -1;
+        int bandwidth;
+	double similarity = 0.90;
+	long largest_count = 0;
+	int iterations = 15;
+	int delta = 5;
+	bool align = false;
+	int sample_size = 0;
+	int pivots = 40;
+	int min_n_feat = 3;
+	int max_n_feat = 5;
+	int mut_type = HandleSeq::SINGLE;
+	uint64_t feat_type = PRED_FEAT_FAST;
+	double min_id = 0.35;
+	std::vector<std::string> files;
+	string output = "output.clstr";
+	void get_opts(int argc, char** argv);
+	pair<int,uint64_t> find_k();
+};
+#endif
diff --git a/src/cluster/src/SingMute.cpp b/src/cluster/src/SingMute.cpp
new file mode 100644
index 0000000..45f1610
--- /dev/null
+++ b/src/cluster/src/SingMute.cpp
@@ -0,0 +1,116 @@
+#include "SingMute.h"
+#include <set>
+#include "Random.h"
+
+
+
+
+void generate_unique_set(size_t cmd_size, std::set<long>& ret, int num_elts, const std::set<long>& bad_set_1, const std::set<long>& bad_set_2, const std::vector<bool> &valid, Random& rng)
+{
+	while (ret.size() <= num_elts) {
+		long idx = rng.randMod<long>(cmd_size);
+		if (valid[idx] &&
+		    ret.find(idx) == ret.end() &&
+		    bad_set_1.find(idx) == bad_set_1.end() &&
+		    bad_set_2.find(idx) == bad_set_2.end()) {
+
+			ret.insert(idx);
+		}
+	}
+}
+char SingMute::randNucl()
+{
+	char character;
+	int value = rng.randMod<int>(percAs + percCs + percGs + percTs);
+	if (value < percAs) {
+		character = 'A';
+	} else if (value < percAs + percCs) {
+		character = 'C';
+	} else if (value < percAs + percCs + percGs) {
+		character = 'G';
+	} else {
+		character = 'T';
+	}
+	return character;
+}
+void SingMute::init(const std::vector<bool> &valid)
+{
+	maxInsert = 0;
+	maxDel = 0;
+	maxSwitch = 0;
+	if (num_mut == 1) {
+		maxInsert = 1;
+		maxDel = 0;
+		maxSwitch = 0;
+	} else if (num_mut == 0) {
+		out_seq = *seq;
+		return;
+	} else {
+		maxSwitch = rng.randMod<long>(num_mut);
+		num_mut -= maxSwitch;
+
+		if (maxSwitch % 2 == 1 && num_mut >= 1) {
+			maxSwitch++;
+			num_mut--;
+		} else if (num_mut == 0) {
+			maxSwitch--;
+			num_mut++;
+		}
+		if (num_mut > 1) {
+			maxInsert = rng.randMod<long>(num_mut);
+			num_mut -= maxInsert;
+		} else {
+			maxInsert = num_mut;
+			num_mut -= maxInsert;
+		}
+		maxDel = num_mut;
+	}
+	size_t seq_len = seq->length();
+
+	maxDel *= seq_len / 100.0;
+	maxInsert *= seq_len / 100.0;
+	maxSwitch *= seq_len / 100.0;
+	alignmentLength = maxInsert;
+	IBP = maxDel + maxSwitch;
+
+
+	std::vector<char> command_str(seq_len, 'S');
+
+	std::set<long> s_ins, s_del, s_switch;
+	generate_unique_set(command_str.size(), s_ins, maxInsert, s_del, s_switch, valid, rng);
+	generate_unique_set(command_str.size(), s_del, maxDel, s_ins, s_switch, valid, rng);
+	generate_unique_set(command_str.size(), s_switch, maxSwitch, s_ins, s_del, valid, rng);
+	for (auto idx : s_ins) {
+		command_str[idx] = 'I';
+	}
+	for (auto idx : s_del) {
+		command_str[idx] = 'D';
+	}
+	for (auto idx : s_switch) {
+		command_str[idx] = 'W';
+	}
+	out_seq = "";
+	out_seq.reserve(maxInsert + seq_len - maxDel + 1);
+
+	for (long i = 0; i < seq_len; i++) {
+		auto cmd = command_str.at(i);
+		switch (cmd) {
+		case 'I': {
+			out_seq += randNucl();
+			out_seq += seq->at(i);
+			break;
+		}
+		case 'S': {
+			out_seq += seq->at(i);
+			break;
+		}
+		case 'D': {
+			break;
+		}
+		case 'W': {
+			out_seq += randNucl();
+			break;
+		}
+		}
+	}
+}
diff --git a/src/cluster/src/SingMute.h b/src/cluster/src/SingMute.h
new file mode 100644
index 0000000..c659afd
--- /dev/null
+++ b/src/cluster/src/SingMute.h
@@ -0,0 +1,48 @@
+
+#ifndef SINGMUTE_H
+#define SINGMUTE_H
+
+#include <vector>
+#include <string>
+#include "Random.h"
+
+class SingMute {
+public:
+	/*
+	 Constructor, creates values
+	 	and assignes allocations based on inputted data
+
+	 @param:
+	 int: percentage of A's
+	 int: percentage of C's
+	 int: percentage of G's
+	 int: percentage of T's
+	 int: The total allocation for non-single mutations
+	 */
+	SingMute(int pa, int pc, int pg, int pt, uintmax_t tt, const std::string* s, const std::vector<bool> &valid_) : percAs(pa),
+														  percCs(pc), percGs(pg), percTs(pt), num_mut(tt), seq(s) {
+		init(valid_);
+	}
+	long getAlignmentLength() { return alignmentLength; }
+	long getIBP() { return IBP; }
+	void init(const std::vector<bool> &valid);
+	std::string& getSeq() { return out_seq; };
+  private:
+	uintmax_t num_mut;
+	int percAs;
+	int percCs;
+	int percGs;
+	int percTs;
+
+	long maxDel;
+	long maxInsert;
+	long maxSwitch;
+
+	long alignmentLength;
+	long IBP;
+	const std::string * seq;
+	std::string out_seq;
+	char randNucl();
+	Random rng;
+};
+#endif
diff --git a/src/cluster/src/SingleFeature.cpp b/src/cluster/src/SingleFeature.cpp
new file mode 100644
index 0000000..bdc441c
--- /dev/null
+++ b/src/cluster/src/SingleFeature.cpp
@@ -0,0 +1,50 @@
+#include "SingleFeature.h"
+
+template<class T>
+void SingleFeature<T>::normalize(const vector<pair<Point<T>*,Point<T>*> > &pairs)
+{
+	for (auto p : pairs) {
+		double d;
+		if (rc.empty()) {
+			d = raw(p.first, p.second);
+		} else {
+			d = rraw(p.first, p.second, rc, rv);
+		}
+		if (!min_set || d < min) {
+			min = d;
+			min_set = true;
+		}
+		if (!max_set || d > max) {
+			max = d;
+			max_set = true;
+		}
+	}
+}
+
+template<class T>
+double SingleFeature<T>::operator()(Point<T> *a, Point<T> *b) const
+{
+	double d;
+	if (rc.empty()) {
+		d = raw(a, b);
+	} else {
+		d = rraw(a, b, rc, rv);
+	}
+//	std::cout << "Raw: " << d << std::endl;
+	double f = (d - min) / (max - min);
+//	std::cout << "Normalized: " << f << std::endl;
+	f = std::min(1.0, std::max(0.0, f));
+	if (is_sim) {
+		return f;
+	} else {
+		return 1.0 - f;
+	}
+}
+
+
+template class SingleFeature<uint8_t>;
+template class SingleFeature<uint16_t>;
+template class SingleFeature<uint32_t>;
+template class SingleFeature<uint64_t>;
+template class SingleFeature<int>;
+template class SingleFeature<double>;
diff --git a/src/cluster/src/SingleFeature.h b/src/cluster/src/SingleFeature.h
new file mode 100644
index 0000000..efa882c
--- /dev/null
+++ b/src/cluster/src/SingleFeature.h
@@ -0,0 +1,26 @@
+#ifndef SINGLEFEATURE_H
+#define SINGLEFEATURE_H
+
+#include "Point.h"
+#include <functional>
+
+template<class T>
+class SingleFeature {
+public:
+	SingleFeature(std::function<double(Point<T>*, Point<T>*)> f, bool is_sim_=true)
+		: raw(f), is_sim(is_sim_), min_set(false), max_set(false) {}
+	SingleFeature(std::function<double(Point<T>*, Point<T>*, const vector<int>&, const vector<int>&)> f, vector<int> rrv, vector<int> rrc, bool is_sim_=true)
+		: rraw(f), is_sim(is_sim_), min_set(false), max_set(false), rv(rrv), rc(rrc) {}
+	void normalize(const vector<pair<Point<T>*,Point<T>*> > &pairs);
+	double operator()(Point<T>*, Point<T>*) const;
+	double min, max;
+private:
+	std::function<double(Point<T>*, Point<T>*)> raw;
+	std::function<double(Point<T>*, Point<T>*, const vector<int>&, const vector<int>&)> rraw;
+	vector<int> rv, rc;
+	const bool is_sim;
+	bool max_set, min_set;
+
+};
+
+#endif
diff --git a/src/cluster/src/SingleFileLoader.cpp b/src/cluster/src/SingleFileLoader.cpp
new file mode 100644
index 0000000..e62715f
--- /dev/null
+++ b/src/cluster/src/SingleFileLoader.cpp
@@ -0,0 +1,84 @@
+/* -*- C++ -*-
+ *
+ * SingleFileLoader.cpp
+ *
+ * Author: Benjamin T James
+ *
+ * Reads sequences one by one from a file
+ */
+#include "SingleFileLoader.h"
+#include <sstream>
+#include <iostream>
+
+std::istream& safe_getline(std::istream& is, std::string& t)
+{
+	t.clear();
+	std::istream::sentry se(is, true);
+	std::streambuf* sb = is.rdbuf();
+	for(;;) {
+		int c = sb->sbumpc();
+		switch (c) {
+		case '\n':
+			return is;
+		case '\r':
+			if (sb->sgetc() == '\n') {
+				sb->sbumpc();
+			}
+			return is;
+		case std::streambuf::traits_type::eof():
+			if (t.empty()) {
+				is.setstate(std::ios::eofbit);
+			}
+			return is;
+		default:
+			t += (char)c;
+		}
+	}
+}
+
+
+SingleFileLoader::SingleFileLoader(std::string filename)
+{
+	in = new std::ifstream(filename);
+	is_first = true;
+}
+std::pair<std::string, std::string*> SingleFileLoader::next()
+{
+	std::pair<std::string,std::string*> ret = std::make_pair("", (std::string*)NULL);
+	if (!in->good()) {
+		return ret;
+	}
+	clock_t begin = clock();
+	ret.second = new std::string("");
+	if (is_first) {
+		safe_getline(*in, buffer);
+		is_first = false;
+	}
+	do {
+		if (buffer[0] == '>') {
+			if (ret.first != "")  {
+				return ret;
+			}
+			ret.first = buffer;
+		} else if (buffer[0] == ' ' || buffer[0] == '\t') {
+			bool all_spaces = true;
+			for (auto c : buffer) {
+				if (c != ' ' && c != '\t') {
+					all_spaces = false;
+				}
+			}
+			if (!all_spaces) {
+				std::ostringstream oss;
+				oss << ret.first << buffer;
+				std::string new_header = oss.str();
+				ret.first = new_header;
+			}
+		} else {
+			ret.second->append(buffer);
+		}
+		safe_getline(*in, buffer);
+	} while (in->good());
+	double diff = clock() - begin;
+//	std::cout << "next(): " << diff / CLOCKS_PER_SEC << std::endl;
+	return ret;
+}
diff --git a/src/cluster/src/SingleFileLoader.h b/src/cluster/src/SingleFileLoader.h
new file mode 100644
index 0000000..d6b3c5d
--- /dev/null
+++ b/src/cluster/src/SingleFileLoader.h
@@ -0,0 +1,29 @@
+/* -*- C++ -*-
+ *
+ * SingleFileLoader.h
+ *
+ * Author: Benjamin T James
+ *
+ * A way of reading in 1 sequence at a time
+ * from FASTA, sequence is heap allocated
+ */
+#ifndef SINGLEFILELOADER_H
+#define SINGLEFILELOADER_H
+
+#include <fstream>
+
+class SingleFileLoader {
+public:
+	SingleFileLoader(std::string file);
+	~SingleFileLoader() {
+		if (in != NULL) {
+			delete in;
+		}
+	}
+	std::pair<std::string,std::string*> next();
+private:
+	std::ifstream *in;
+	std::string buffer;
+	bool is_first;
+};
+#endif
diff --git a/src/cluster/src/SingleMute.cpp b/src/cluster/src/SingleMute.cpp
new file mode 100644
index 0000000..1f435f7
--- /dev/null
+++ b/src/cluster/src/SingleMute.cpp
@@ -0,0 +1,221 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to perform single mutations on sequences
+ */
+
+#include "SingleMute.h"
+#include <random>
+
+int intRandMod_(int max) {
+    static thread_local std::mt19937 generator;
+    std::uniform_int_distribution<int> distribution(0, max-1);
+    return distribution(generator);
+}
+
+SingleMute::SingleMute(int a, int c, int g, int t, int alloc) {
+	percAs = a;
+	percCs = c;
+	percGs = g;
+	percTs = t;
+	//If allocation is 0, all sub allocations are 0
+	if (alloc == 0) {
+		maxDel = 0;
+		maxInsert = 0;
+		maxSwitch = 0;
+	}
+	//Arbitrary, if only 1 percent is allocated, then only insert gets an allocation
+	else if (alloc == 1) {
+		maxSwitch = 0;
+		maxDel = 0;
+		maxInsert = 1;
+	}
+	//Otherwise, allocations are assigned randomly
+	else {
+		//Max switch gets a random allocation,
+		//but allocation has to be even
+		//(don't want to switch something with itself)
+		maxSwitch = intRandMod_(alloc);//rand() % alloc;
+		alloc -= maxSwitch;
+		//If alloc is odd,
+		//and there is still percent that can be allocated
+		if (maxSwitch % 2 == 1 && alloc >= 1) {
+			//Make allocation 1 less,
+			//and switch allocation one more (now even)
+			maxSwitch++;
+			alloc--;
+		}
+		//Otherwise, make allocation one larger,
+		//switch allocation one less (even)
+		else if (alloc == 0) {
+			maxSwitch--;
+			alloc++;
+		}
+		//If alloc is greater than 1 (must be for % purposes),
+		//calculate random value for inerst allocation
+		if (alloc > 1) {
+			maxInsert = intRandMod_(alloc);//rand() % alloc;
+			alloc -= maxInsert;
+		} else {
+			maxInsert = alloc;
+			alloc -= maxInsert;
+		}
+		//Max delete is assigned whatever is left
+		maxDel = alloc;
+	}
+}
+
+int SingleMute::getAlignmentLength(){
+	return alignmentLength;
+}
+
+int SingleMute::getIBP(){
+	return IBP;
+}
+
+void SingleMute::genSing(string * sequence, vector<bool> mutes) {
+	seq = sequence;
+	//Assign vector of mutes to inputted vector
+	validIndexes = new vector<int>();
+	validIndexes->reserve(mutes.size());
+//	n_valid_indices = mutes.size();
+	//Adds all valid indexes to the validIndexes vector
+	for(int i = 0; i < mutes.size(); i++){
+		if(mutes.at(i)){
+			validIndexes->push_back(i);
+		}
+	}
+	n_valid_indices = validIndexes->size();
+	float tempFloat;
+	//Calculate number of characters each mutation can mutate
+	tempFloat = maxDel / 100.0;
+	maxDel = (int) (tempFloat * seq->length());
+	tempFloat = maxInsert / 100.0;
+	maxInsert = (tempFloat * seq->length());
+	tempFloat = maxSwitch / 100.0;
+	maxSwitch = (tempFloat * seq->length());
+	//Calculates Alignment length and identical base pairs
+	alignmentLength = maxInsert;
+	IBP = maxDel + maxSwitch;
+	//Vectors to keep track of where insertions and deletions need to be made
+	insertions = new vector<int>();
+	insertions->reserve(maxInsert);
+	deletions = new vector<int>();
+	deletions->reserve(maxDel);
+	//Since switch makes 2 invalid,
+	//switchNucl is run maxSwitch/2 times
+	for (int i = 0; i < maxSwitch; i++) {
+		switchNucl();
+	}
+	//Insert maxInsert times
+	for (int i = 0; i < maxInsert; i++) {
+		insert();
+	}
+	//Delete maxDel nucleotides
+	for (int i = 0; i < maxDel; i++) {
+		deleteNucl();
+	}
+	//perfroms deletions and insertions
+	performInsertAndDelete();
+}
+
+void SingleMute::insert() {
+	//Calculate the index to insert at
+	int index = intRandMod_(n_valid_indices);//rand() % validIndexes->size();
+	insertions->push_back(validIndexes->at(index));
+	std::swap(validIndexes->at(index), validIndexes->at(n_valid_indices-1));
+	n_valid_indices--;
+	//Remove that as a valid index
+//	validIndexes->erase(validIndexes->begin() + index, validIndexes->begin() + index + 1);
+}
+
+void SingleMute::deleteNucl() {
+	//Choose a valid index to delete
+	int index = intRandMod_(n_valid_indices);//rand() % validIndexes->size();
+	deletions->push_back(validIndexes->at(index));
+	std::swap(validIndexes->at(index), validIndexes->at(n_valid_indices-1));
+	n_valid_indices--;
+	//Remove from the
+//	validIndexes->erase(validIndexes->begin() + index, validIndexes->begin() + index + 1);
+}
+
+void SingleMute::switchNucl() {
+	//Pick a random valid index
+	int index = intRandMod_(n_valid_indices);//rand() % validIndexes->size();
+	char character = seq->at(validIndexes->at(index));
+	int value;
+	//Keep generating characters until one different than the one we are trying to switch is found
+	while(character == seq->at(validIndexes->at(index))){
+		value = intRandMod_(percAs + percCs + percGs + percTs);
+		if (value < percAs) {
+			character = 'A';
+		} else if (value < percAs + percCs) {
+			character = 'C';
+		} else if (value < percAs + percCs + percGs) {
+			character = 'G';
+		} else {
+			character = 'T';
+		}
+	}
+	//Switch that character
+	seq->at(validIndexes->at(index)) = character;
+	std::swap(validIndexes->at(index), validIndexes->at(n_valid_indices-1));
+	n_valid_indices--;
+	//Remove the chosen index as a valid index
+//	validIndexes->erase(validIndexes->begin() + index, validIndexes->begin() + index + 1);
+}
+
+void SingleMute::performInsertAndDelete(){
+	//sorts the vectors based
+	std::sort(insertions->begin(), insertions->end());
+	std::sort(deletions->begin(), deletions->end());
+	//Goes through both vectors untill all have been processed
+	for(int i = insertions->size() - 1, j = deletions->size() - 1; i >= 0 && j >= 0;){
+		//If i is -1, all insertions have been processed
+		if(i == -1){
+			removeNucl(deletions->at(j));
+			j--;
+		}
+		//If i is -1, all deletions have been processed
+		else if(j == -1){
+			insertNucl(insertions->at(i));
+			i--;
+		}
+		else{
+			//If the index of the current next insertion is higher than the next deletion, insert, else delete
+			if(insertions->at(i) > deletions->at(j)){
+				insertNucl(insertions->at(i));
+				i--;
+			}
+			else{
+				removeNucl(deletions->at(j));
+				j--;
+			}
+		}
+	}
+}
+
+void SingleMute::removeNucl(int index){
+	seq->erase(index, 1);
+}
+
+void SingleMute::insertNucl(int index){
+	string character;
+	//Use a weighted die to
+	//calculate which character to insert
+	int value = intRandMod_(percAs + percCs + percGs + percTs);
+	if (value < percAs) {
+		character = "A";
+	} else if (value < percAs + percCs) {
+		character = "C";
+	} else if (value < percAs + percCs + percGs) {
+		character = "G";
+	} else {
+		character = "T";
+	}
+	//insert at that index
+	seq->insert(index, character);
+}
diff --git a/src/cluster/src/SingleMute.h b/src/cluster/src/SingleMute.h
new file mode 100644
index 0000000..b0bf93d
--- /dev/null
+++ b/src/cluster/src/SingleMute.h
@@ -0,0 +1,89 @@
+/**
+ * Author: Alex Baumgartner
+ * The Bioinformatics Toolsmith Laboratory, the University of Tulsa
+ * 5/15/2018
+ *
+ * Purpose:
+ *	The pupose of this module is to perform single mutations on sequences
+ */
+
+#ifndef SINGLEMUTE_H
+#define  SINGLEMUTE_H
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+using namespace std;
+
+class SingleMute {
+public:
+	/*
+	 Constructor, creates values
+	 	and assignes allocations based on inputted data
+
+	 @param:
+	 int: percentage of A's
+	 int: percentage of C's
+	 int: percentage of G's
+	 int: percentage of T's
+	 int: The total allocation for non-single mutations
+	 */
+	SingleMute(int, int, int, int, int);
+	/*
+	 Takes a string and mutates it based
+	 	on the allocation given in the constructor
+
+	 @param:
+	 std::string *: pointer to the sequence to be mutated
+	 std::vector<bool> : boolean vector of valid and invalid indexes
+	 */
+	void genSing(std::string *, std::vector<bool>);
+
+	int getAlignmentLength();
+
+	int getIBP();
+
+	~SingleMute(){delete validIndexes; delete insertions; delete deletions;};
+
+  private:
+	int percAs;
+	int percCs;
+	int percGs;
+	int percTs;
+	int maxDel;
+	int maxInsert;
+	int maxSwitch;
+	int alignmentLength;
+	int IBP;
+	std::vector<int> * validIndexes;
+	size_t n_valid_indices = 0;
+	std::vector<int> * deletions;
+	std::vector<int> * insertions;
+	std::string * seq;
+	/*
+	 Inserts a sequence randomly in the list
+	 	at a valid index
+	 */
+	void insert();
+	/*
+	 Deletes a random nucleotide
+	 	that has not been previously mutated
+	 */
+	void deleteNucl();
+	/*
+	 Switches two random nucleotides
+	 	that have not been mutated previously
+	 */
+	void switchNucl();
+	/*
+	Performs necessary insertions and deletions in the string based on the insertion and deletion vectors
+	*/
+	void performInsertAndDelete();
+
+	void removeNucl(int);
+
+	void insertNucl(int);
+};
+#endif
diff --git a/src/cluster/src/Trainer.cpp b/src/cluster/src/Trainer.cpp
new file mode 100644
index 0000000..432d624
--- /dev/null
+++ b/src/cluster/src/Trainer.cpp
@@ -0,0 +1,930 @@
+#include "Trainer.h"
+#include "HandleSeq.h"
+#include "Loader.h"
+#include "ClusterFactory.h"
+#include <algorithm>
+#include <set>
+#include <map>
+#include <cmath>
+#include "../../utility/GlobAlignE.h"
+#include "../../utility/AffineId.h"
+#include "needleman_wunsch.h"
+#include "Predictor.h"
+#include "GLM.h"
+#include "Feature.h"
+#include "Progress.h"
+#include <random>
+
+template<class T>
+double Trainer<T>::align(Point<T> *a, Point<T>* b) const
+{
+	auto sa = a->get_data_str();
+	auto sb = b->get_data_str();
+	int la = sa.length();
+	int lb = sb.length();
+
+	// needleman_wunsch nw(sa, sb, 2, -3, 5, 2);
+	// return nw.identity(nw.align());
+	GlobAlignE galign(sa.c_str(), 0, la-1,
+			  sb.c_str(), 0, lb-1,
+			  1, -1, 2, 1);
+
+	return galign.getIdentity();
+
+}
+
+
+template<class T>
+std::tuple<Point<T>*,double,size_t,size_t> Trainer<T>::get_close(Point<T> *p, bvec_iterator<T> istart, bvec_iterator<T> iend, bool &is_min_r) const
+{
+	int ncols = weights.getNumRow();
+#pragma omp declare reduction(pmax:std::tuple<Point<T>*,double,size_t,size_t>: \
+			      omp_out = get<1>(omp_in) > get<1>(omp_out) ? omp_in : omp_out ) \
+	initializer (omp_priv=std::make_tuple((Point<T>*)NULL,-1,0,0))
+
+	std::tuple<Point<T>*,
+		   double,
+		   size_t,
+		   size_t> result = std::tuple<Point<T>*, double, size_t, size_t>(NULL,
+				     -1,
+				     0,
+				     0);
+	bool has_found = false;
+
+	#ifdef DEBUG
+	cout << "begin " << istart.r << " " << istart.c << " end " << iend.r << " " << iend.c << endl;
+	for (auto data : *istart.col) {
+		cout << "\t" << data.size() << endl;
+	}
+	#endif
+// #pragma omp parallel for reduction(pmin:result), reduction(||:has_found)
+// 	for (bvec_iterator<T> i = istart; i <= iend; i++) {
+// 		if (i <= iend) {
+// 		Point<T>* pt = (*i).first;
+// 		double sum = weights.get(0, 0);
+// 		double dist = 0;
+// 		for (int col = 1; col < ncols; col++) {
+// 			if (col == 1) {
+// 				dist = ff.at(col-1)(pt, p);
+// 				sum += weights.get(col, 0) * dist;
+// 			} else {
+// 				sum += weights.get(col, 0) * ff.at(col-1)(pt, p);
+// 			}
+// 		}
+// 		double res = round(1.0 / (1 + exp(-sum)));
+
+// // set second to true if result is not 1.0
+// 		// which means it will be removed
+// 		result = std::make_pair(pt, dist);
+// 		has_found = (res != 1.0);
+// 		(*i).second = (res != 1.0);
+// 		}
+// 	}
+	bool is_min = true;
+#pragma omp parallel for reduction(pmax:result), reduction(&&:is_min)
+	for (bvec_iterator<T> i = istart; i <= iend; ++i) {
+		Point<T>* pt = (*i).first;
+		double sum = weights.get(0, 0);
+		double dist = 0;
+		auto cache = feat->compute(*pt, *p);
+		for (int col = 1; col < ncols; col++) {
+			if (col == 1) {
+				dist = (*feat)(col-1, cache);
+				sum += weights.get(col, 0) * dist;
+			} else {
+				sum += weights.get(col, 0) * (*feat)(col-1, cache);
+			}
+		}
+		double res = round(1.0 / (1 + exp(-sum)));
+		//cout << "res: " << res << " " << dist << endl;
+// set second to true if result is not 1.0
+		// which means it will be removed
+		result = (dist > std::get<1>(result)) ? std::make_tuple(pt, dist, i.r, i.c) : result;
+		is_min = is_min && (res != 1.0);
+//		has_found = has_found || (res != 1.0);
+		if (res == 1.0) {
+			*i = std::make_pair(pt, true);
+//			(*i).second = true;
+		}
+	}
+
+//	is_min = !has_found;
+	is_min_r = is_min;
+//	return get<0>(result);
+	return result;
+
+}
+
+template<class T>
+long Trainer<T>::merge(vector<Center<T> > &centers, long current, long begin, long last) const
+{
+#pragma omp declare reduction(ldpmax:std::pair<long,double>:			\
+			      omp_out = omp_in.second > omp_out.second ? omp_in : omp_out ) \
+	initializer (omp_priv=std::make_pair(0, std::numeric_limits<double>::min()))
+	std::pair<long,double> best = std::make_pair(0, std::numeric_limits<double>::min());
+	Point<T>* p = centers[current].getCenter();
+#pragma omp parallel for reduction(ldpmax:best)
+	for (long i = begin; i <= last; i++) {
+		double sum = weights.get(0, 0);
+		double dist = 0;
+		Point<T>* cen = centers[i].getCenter();
+		auto cache = feat->compute(*cen, *p);
+		for (int col = 1; col < weights.getNumRow(); col++) {
+			double d = (*feat)(col-1, cache);
+			if (col == 1) {
+				dist = d;
+			}
+			sum += weights.get(col, 0) * d;
+		}
+		double res = round(1.0 / (1 + exp(-sum)));
+
+		if (res == 1) {
+			best = best.second > dist ? best : std::make_pair(i, dist);
+		}
+	}
+	return best.first;
+}
+
+template<class T>
+vector<pair<Point<T>*,Point<T>*> > resize_vec(vector<pair<pair<Point<T>*,Point<T>*>, double> > &vec, size_t new_size)
+{
+	cout << "Vector size: " << vec.size() << " min size: " << new_size << endl;
+	vector<pair<Point<T>*, Point<T>*> > data;
+	if (vec.size() <= new_size) {
+		for (int i = 0; i < vec.size(); i++) {
+			data.push_back(vec[i].first);
+		}
+		return data;
+	}
+	using k = pair<pair<Point<T>*,Point<T>*>, double>;
+	std::sort(vec.begin(), vec.end(), [](const k& a, const k& b) {
+			return a.second < b.second;
+		});
+	double interval = (double)vec.size() / (vec.size() - new_size);
+	std::set<int> indices;
+	int i = 0;
+	for (double index = 0; round(index) < vec.size() && i < (vec.size() - new_size);
+	     i++, index += interval) {
+		int j = round(index);
+		indices.insert(j);
+	}
+
+	std::cout << "index size: " << indices.size() << std::endl;
+
+	// for (double index = 0; round(index) < vec.size() && indices.size() < new_size;
+	//      index += interval) {
+	// 	int j = round(index);
+	// 	indices.insert(vec[j]);
+	// }
+	// vec.erase(vec.begin(), std::remove_if(vec.begin(), vec.end(), [&](const k& a) {
+	// 			return indices.find(a) == indices.end();
+	// 		}));
+	for (auto iter = indices.rbegin(); iter != indices.rend(); iter++) {
+		int idx = *iter;
+		vec.erase(vec.begin() + idx);
+	}
+	if (vec.size() != new_size) {
+		cerr << "sizes are not the same: " << vec.size() << " " << new_size <<  endl;
+		throw "Resize did not work";
+	}
+	for (auto a : vec) {
+		data.push_back(a.first);
+	}
+	return data;
+}
+
+struct rng {
+	rng() {
+		srand(0);
+	}
+	int operator()(int n) const {
+		return rand() % n;
+	}
+};
+template<class T>
+	pair<vector<pair<Point<T>*,
+			 Point<T>*
+			 > >,
+	     vector<pair<Point<T>*,
+			 Point<T>*> > > Trainer<T>::get_labels(vector<pair<Point<T>*,Point<T>*> > &vec, double cutoff) const
+{
+
+	auto cmp = [](const pair<Point<T>*,Point<T>*> a, const pair<Point<T>*,Point<T>*> b) {
+		return a.first->get_header().compare(b.first->get_header()) < 0
+		||
+		(a.first->get_header() == b.first->get_header() && a.second->get_header().compare(b.second->get_header()) < 0);
+	};
+	auto scmp = [](const pair<pair<Point<T>*,Point<T>*>,double> a, const pair<pair<Point<T>*,Point<T>*>, double> b) {
+		return a.first.first->get_header().compare(b.first.first->get_header()) < 0
+		||
+		(a.first.first->get_header() == b.first.first->get_header() && a.first.second->get_header().compare(b.first.second->get_header()) < 0);
+	};
+
+	// todo: convert to std::map
+	std::set<pair<pair<Point<T>*,Point<T>*>, double>, decltype(scmp)> buf_pos(scmp), buf_neg(scmp);
+	std::vector<pair<pair<Point<T>*,Point<T>*>, double> > buf_vpos, buf_vneg;
+//	std::sort(vec.begin(), vec.end(), cmp);
+	// cout << "Before Pair: " << vec[0].first->get_header() << ", " << vec[0].second->get_header() << endl;
+	// cout << "Before Pair: " << vec[vec.size()-1].first->get_header() << ", " << vec[vec.size()-1].second->get_header() << endl;
+
+	rng gen;
+	random_shuffle(vec.begin(), vec.end(), gen);
+	// cout << "Pair: " << vec[0].first->get_header() << ", " << vec[0].second->get_header() << endl;
+	// cout << "Pair: " << vec[vec.size()-1].first->get_header() << ", " << vec[vec.size()-1].second->get_header() << endl;
+	vector<double> scores(vec.size());
+	Progress p(vec.size(), "Alignment");
+#pragma omp parallel for schedule(dynamic)
+	for (int i = 0; i < vec.size(); i++) {
+		double algn = align(vec[i].first, vec[i].second);
+		bool is_pos = algn >= cutoff;
+#pragma omp critical
+		{
+			scores[i] = algn;
+			p++;
+			if (is_pos) {
+				buf_pos.insert(make_pair(vec[i], algn));
+				//cout << vec[i].first->get_header() << " " << vec[i].second->get_header() << " " << algn << endl;
+			} else {
+				buf_neg.insert(make_pair(vec[i], algn));
+			}
+
+#ifdef DEBUG
+			cout << vec[i].first->get_header() << " WITH " << vec[i].second->get_header() << " " << algn << endl;
+			#endif
+
+		}
+	}
+	p.end();
+	std::sort(scores.begin(), scores.end());
+	std::cout << "positive=" << buf_pos.size() << " negative=" << buf_neg.size() << endl;
+	if (buf_pos.empty() || buf_neg.empty()) {
+		std::cout << "Identity value does not match sampled data: ";
+		if (buf_pos.empty()) {
+			std::cout << "Too many sequences below identity";
+		} else {
+			std::cout << "Too many sequences above identity";
+		}
+		std::cout << std::endl;
+		exit(0);
+	}
+	size_t m_size = std::min(buf_pos.size(), buf_neg.size());
+
+	std::cout << "resizing positive" << std::endl;
+	for (auto p : buf_pos) {
+		buf_vpos.push_back(p);
+	}
+	for (auto p : buf_neg) {
+		buf_vneg.push_back(p);
+	}
+	auto bp = resize_vec(buf_vpos, m_size);
+	std::cout << "resizing negative" << std::endl;
+	auto bn = resize_vec(buf_vneg, m_size);
+        auto ret = make_pair(bp, bn);
+	std::cout << "positive=" << ret.first.size() << " negative=" << ret.second.size() << endl;
+	return ret;
+
+}
+template<class T>
+void Trainer<T>::filter(Point<T> *p, vector<pair<Point<T> *, bool> > &vec) const
+{
+	for (auto& pt : vec) {
+		double sum = weights.get(0, 0);
+		auto cache = feat->compute(*pt.first, *p);
+		for (int col = 1; col < weights.getNumRow(); col++) {
+			sum += weights.get(col, 0) * (*feat)(col-1, cache);
+		}
+		double res = round(1.0 / (1 + exp(-sum)));
+		pt.second = (res != 1);
+	}
+	vec.erase(std::remove_if(vec.begin(), vec.end(), [](pair<Point<T>*, bool> p) {
+				return p.second;
+			}), vec.end());
+}
+
+template<class T>
+Point<T>* Trainer<T>::closest(Point<double> *p, vector<pair<Point<T> *, bool> > &vec) const
+{
+	Point<T>* best_pt = NULL;
+	double best_dist = 0;
+	for (auto& pt : vec) {
+		double sum = weights.get(0, 0);
+		double dist = pt.first->distance_d(*p);
+		if (best_pt == NULL || dist < best_dist) {
+			best_dist = dist;
+			best_pt = pt.first;
+		}
+	}
+	return best_pt;
+}
+
+template<class T>
+std::pair<matrix::Matrix,matrix::Matrix> Trainer<T>::generate_feat_mat(pair<vector<pair<Point<T> *, Point<T> *> >, vector<pair<Point<T> *, Point<T> *> > > &data, int ncols)
+{
+	int nrows = data.first.size() + data.second.size();
+	matrix::Matrix feat_mat(nrows, ncols);
+	matrix::Matrix labels(nrows, 1);
+#pragma omp parallel for
+	for (int i = 0; i < data.first.size(); i++) {
+		auto kv = data.first[i];
+		int row = i;
+		auto cache = feat->compute(*kv.first, *kv.second);
+		for (int col = 0; col < ncols; col++) {
+
+			if (col == 0) {
+				feat_mat.set(row, col, 1);
+			} else {
+//				double val = ff[col-1](kv.first, kv.second);
+				////#pragma omp critical
+				double val = (*feat)(col-1, cache);
+				feat_mat.set(row, col, val);
+			}
+
+		}
+		////#pragma omp critical
+		labels.set(row, 0, 1);
+	}
+#pragma omp parallel for
+	for (int i = 0; i < data.second.size(); i++) {
+		auto kv = data.second[i];
+		int row = data.first.size() + i;
+		auto cache = feat->compute(*kv.first, *kv.second);
+		for (int col = 0; col < ncols; col++) {
+
+			if (col == 0) {
+				feat_mat.set(row, col, 1);
+			} else {
+//				double val = ff[col-1](kv.first, kv.second);
+				////#pragma omp critical
+				double val = (*feat)(col-1, cache);
+				feat_mat.set(row, col, val);
+			}
+
+		}
+		////#pragma omp critical
+		labels.set(row, 0, -1);
+	}
+	return std::make_pair(feat_mat, labels);
+}
+template<class T>
+double Trainer<T>::train_n(pair<vector<pair<Point<T> *, Point<T> *> >, vector<pair<Point<T> *, Point<T> *> > > &data, int ncols)
+{
+	std::cout << "done" << endl;
+	cout << "Training on " << ncols << " columns" << endl;
+	int nrows = data.first.size() + data.second.size();
+
+	matrix::Matrix feat_mat(nrows, ncols);
+	matrix::Matrix labels(nrows, 1);
+	double avg_label = 0;
+#pragma omp parallel for
+	for (int i = 0; i < data.first.size(); i++) {
+		auto kv = data.first[i];
+		int row = i;
+		auto cache = feat->compute(*kv.first, *kv.second);
+		for (int col = 0; col < ncols; col++) {
+
+			if (col == 0) {
+				feat_mat.set(row, col, 1);
+			} else {
+//				double val = ff[col-1](kv.first, kv.second);
+				////#pragma omp critical
+				double val = (*feat)(col-1, cache);
+				feat_mat.set(row, col, val);
+			}
+
+		}
+		////#pragma omp critical
+		labels.set(row, 0, 1);
+	}
+#pragma omp parallel for
+	for (int i = 0; i < data.second.size(); i++) {
+		auto kv = data.second[i];
+		int row = data.first.size() + i;
+		auto cache = feat->compute(*kv.first, *kv.second);
+		for (int col = 0; col < ncols; col++) {
+
+			if (col == 0) {
+				feat_mat.set(row, col, 1);
+			} else {
+//				double val = ff[col-1](kv.first, kv.second);
+				////#pragma omp critical
+				double val = (*feat)(col-1, cache);
+				feat_mat.set(row, col, val);
+			}
+
+		}
+		////#pragma omp critical
+		labels.set(row, 0, -1);
+	}
+	for (int row = 0; row < nrows; row++) {
+		for (int col = 0; col < ncols; col++) {
+			double val = feat_mat.get(row, col);
+			std::cout << val << "\t";
+		}
+		std::cout << endl;
+	}
+	glm.train(feat_mat, labels);
+	weights = glm.get_weights();
+	#ifdef DEBUG
+	for (int i = 0; i < ncols; i++) {
+		cout << "weight: " << weights.get(i, 0) << endl;
+
+	}
+	#endif
+	matrix::Matrix p = glm.predict(feat_mat);
+	for (int row = 0; row < nrows; row++) {
+		if (p.get(row, 0) == 0) {
+			p.set(row, 0, -1);
+		}
+	}
+	auto tup = glm.accuracy(labels, p);
+	return get<0>(tup);
+}
+
+double random_between(double mute, double rng, double low, double high)
+{
+	double r_d = (double)rand() / RAND_MAX;
+	double mn = std::max(mute - rng, low);
+	double mx = std::min(mute + rng, high);
+	return r_d * (mx - mn) + mn;
+}
+
+template<class T>
+void Trainer<T>::mutate_seqs(Point<T>* p, size_t num_seq, vector<pra<T> > &pos_buf, vector<pra<T> > &neg_buf, double id_begin, double id_end, uintmax_t& _id)
+{
+	HandleSeq h(HandleSeq::BOTH);
+	ClusterFactory<T> factory(k);
+	double inc = (id_end - id_begin) / num_seq;
+	std::string bin_seq = p->get_data_str();
+	std::string seq;
+	for (auto c : bin_seq) {
+		switch (c) {
+		case 0:
+			seq += 'A';
+			break;
+		case 1:
+			seq += 'C';
+			break;
+		case 2:
+			seq += 'G';
+			break;
+		case 3:
+			seq += 'T';
+			break;
+		case 'N':
+			seq += 'C';
+			break;
+		default:
+			cout << "Invalid character " << c << endl;
+			cout << "from sequence " << bin_seq << endl;
+			throw 3;
+		}
+	}
+	for (size_t i = 0; i < num_seq; i++) {
+		double iter_id = id_begin + inc * (i + 0.5);
+		double actual_id = random_between(iter_id, inc, id_begin, id_end);
+		int mut = round(100 - actual_id);
+		auto newseq = h.mutate(seq, mut);
+		std::string chrom;
+		std::string header = p->get_header();
+		Point<T>* new_pt = Loader<T>::get_point(header, newseq.second, _id, k);
+		pra<T> pr;
+		pr.first = p->clone();
+		pr.second = new_pt;
+		pr.val = newseq.first;
+		if (pr.val > cutoff) {
+			pos_buf.push_back(pr);
+		} else {
+			neg_buf.push_back(pr);
+		}
+	}
+}
+
+template<class T>
+std::pair<std::pair<vector<pair<Point<T>*,Point<T>*> >,
+		    vector<pair<Point<T>*,Point<T>*> > >,
+	  std::pair<vector<pair<Point<T>*,Point<T>*> >,
+		    vector<pair<Point<T>*,Point<T>*> > > >
+Trainer<T>::new_get_labels(std::vector<Point<T>*> &points, size_t num_sample, double id, uintmax_t &_id)
+{
+	std::sort(points.begin(), points.end(), [](const Point<T>* a,
+						   const Point<T>* b) -> bool {
+			  return a->get_length() < b->get_length();
+		  });
+	std::pair<vector<pair<Point<T>*,Point<T>*> >,
+		  vector<pair<Point<T>*,Point<T>*> > > training, testing;
+	num_sample = min(num_sample, points.size());
+	vector<Point<T>*> f_points_tr, f_points_test;
+	size_t total_size = points.size();
+	for (int i = 0; i < num_sample; i++) {
+		int i1 = floor((double)i * total_size / (2 * num_sample));
+		int i2 = floor((i + 1) * (double)total_size / (2 * num_sample));
+		f_points_tr.push_back(points.at(i1));
+		f_points_test.push_back(points.at(i2));
+	}
+	std::vector<pra<T> > pos_buf, neg_buf;
+	cout << "mutating sequences" << endl;
+	for (auto p : f_points_tr) {
+		mutate_seqs(p, 5, pos_buf, neg_buf, 100 * id, 100, _id);
+		mutate_seqs(p, 5, pos_buf, neg_buf, 40, 100 * id, _id);
+	}
+	size_t buf_size = std::min(pos_buf.size(), neg_buf.size());
+	cout << "training +: " << pos_buf.size() << endl;
+	cout << "training -: " << neg_buf.size() << endl;
+	std::vector<std::vector<pra<T> > > bins;
+	size_t num_bins;
+	for (int i = 0; i < 10; i++) {
+		double max_identity = id * 100 + (100 - 100.0 * id) * (i+1) / 10.0;
+		double min_identity = id * 100 + (100 - 100.0 * id) * i / 10.0;
+		cout << "I = " << i << " " << min_identity << " -> " << max_identity << endl;
+		bins.push_back(std::vector<pra<T> >());
+		for (auto p : pos_buf) {
+			if (p.val > min_identity && p.val < max_identity) {
+				bins[i].push_back(p);
+			}
+		}
+		for (auto p : neg_buf) {
+			if (p.val > min_identity && p.val < max_identity) {
+				bins[i].push_back(p);
+			}
+		}
+	}
+	std::random_shuffle(pos_buf.begin(), pos_buf.end());
+	std::random_shuffle(neg_buf.begin(), neg_buf.end());
+	for (size_t i = 0; i < buf_size; i++) {
+		cout << "TR: P " << pos_buf[i].val << endl;
+		cout << "TR: N " << neg_buf[i].val << endl;
+		if (pos_buf[i].val > id) {
+			training.first.emplace_back(pos_buf[i].first, pos_buf[i].second);
+		} else {
+			training.second.emplace_back(pos_buf[i].first, pos_buf[i].second);
+		}
+		if (neg_buf[i].val > id) {
+			training.first.emplace_back(neg_buf[i].first, neg_buf[i].second);
+		} else {
+			training.second.emplace_back(neg_buf[i].first, neg_buf[i].second);
+		}
+	}
+	pos_buf.clear();
+	neg_buf.clear();
+	for (auto p : f_points_test) {
+		mutate_seqs(p, 5, pos_buf, neg_buf, 100 * id, 100, _id);
+		mutate_seqs(p, 5, pos_buf, neg_buf, 40, 100 * id, _id);
+	}
+	buf_size = std::min(pos_buf.size(), neg_buf.size());
+	cout << "testing +: " << pos_buf.size() << endl;
+	cout << "testing -: " << neg_buf.size() << endl;
+	std::random_shuffle(pos_buf.begin(), pos_buf.end());
+	std::random_shuffle(neg_buf.begin(), neg_buf.end());
+	for (size_t i = 0; i < buf_size; i++) {
+		cout << "TE: P " << pos_buf[i].val << endl;
+		cout << "TE: N " << neg_buf[i].val << endl;
+		if (pos_buf[i].val > id) {
+			testing.first.emplace_back(pos_buf[i].first, pos_buf[i].second);
+		} else {
+			testing.second.emplace_back(pos_buf[i].first, pos_buf[i].second);
+		}
+		if (neg_buf[i].val > id) {
+			testing.first.emplace_back(neg_buf[i].first, neg_buf[i].second);
+		} else {
+			testing.second.emplace_back(neg_buf[i].first, neg_buf[i].second);
+		}
+	}
+	return make_pair(training, testing);
+}
+template<class T>
+void Trainer<T>::train(int min_n_feat, int max_n_feat, uint64_t feat_type, int mut_type, double min_id, double acc_cutoff)
+{
+
+	if (k != 0) {
+		std::cout << "Splitting data" << endl;
+		uintmax_t _id = points.size();
+		Predictor<T> pred(k, cutoff, PRED_MODE_CLASS, feat_type,
+				  mut_type, min_n_feat, max_n_feat, min_id);
+		pred.train(points, points, _id, n_points);
+		delete feat;
+		auto pr = pred.get_class();
+		feat = pr.first;
+		glm = pr.second;
+		weights = glm.get_weights();
+		return;
+	} else {
+		feat->add_feature(FEAT_ALIGN, Combo::xy);
+//		feat->normalize(training.first);
+		feat->finalize();
+		weights = matrix::Matrix(2, 1);
+		weights.set(0, 0, -1 * cutoff);
+		weights.set(1, 0, 1);
+		return;
+	}
+}
+
+template<class T>
+vector<pair<Point<T>*, Point<T>*> > Trainer<T>::split()
+{
+	// n_points total per side
+	// max_pts_from_one on each side
+	auto cmp = [](const pair<Point<T>*,Point<T>*> a, const pair<Point<T>*,Point<T>*> b) {
+			return a.first->get_header().compare(b.first->get_header()) < 0
+||
+										      (a.first->get_header() == b.first->get_header() && a.second->get_header().compare(b.second->get_header()) < 0);
+	};
+        set<pair<Point<T>*, Point<T>*>, decltype(cmp)> pairs(cmp);
+//	vector<pair<Point<T>*, Point<T>*> > pairs;
+	const size_t total_num_pairs = n_points * 2;
+	int aerr = 0;
+	int bandwidth = (1.0 - cutoff) * 10000;
+	vector<Point<T>*> indices;
+	std::sort(points.begin(), points.end(), [](const Point<T>* a,
+						   const Point<T>* b) -> bool {
+			  return a->get_length() < b->get_length();
+			  });
+	Point<T> *begin_pt = points[points.size()/2];
+
+	std::sort(points.begin(), points.end(), [&](const Point<T>* a,
+							    const Point<T>* b) -> bool {
+				  return a->distance(*begin_pt) < b->distance(*begin_pt);
+			  });
+	int num_iterations = ceil(((double)n_points) / max_pts_from_one) - 1;
+	for (int i = 0; i <= num_iterations; i++) {
+		int idx = i * (points.size()-1) / num_iterations;
+		indices.push_back(points[idx]);
+	}
+	cout << "Point pairs: " << indices.size() << endl;
+	size_t to_add_each = max_pts_from_one / 2;
+	Progress prog(indices.size(), "Sorting data");
+#pragma omp parallel for schedule(dynamic)
+	for (int i = 0; i < indices.size(); i++) {
+		vector<Point<T>*> pts = points;
+		Point<T>* p = indices[i];
+		std::sort(pts.begin(), pts.end(), [&](const Point<T>* a,
+						      const Point<T>* b) {
+				  return a->distance(*p) < b->distance(*p);
+			  });
+		// do binary search with alignment
+		size_t offset = pts.size() / 4;
+		size_t pivot = offset;
+		double closest_algn = 20000;
+		size_t best_pivot = 2 * offset;
+		for (pivot = 2 * offset; offset > 0; offset /= 2) {
+			double algn = align(p, pts[pivot]);
+			// cout << "Pivot: " << pivot << " point: " << pts[pivot]->get_header() << " sim: " << align(p, pts[pivot]) << endl;
+			if (fabs(algn - cutoff) < closest_algn) {
+				closest_algn = fabs(algn - cutoff);
+				best_pivot = pivot;
+			}
+			if (algn < cutoff) {
+				pivot -= offset;
+			} else if (algn > cutoff) {
+				pivot += offset;
+			} else {
+				break;
+			}
+		}
+//		cout << "Pivot: " << pivot << " point: " << pts[pivot]->get_header() << " sim: " << align(p, pts[pivot]) << endl;
+		// before: [0, pivot) size: to_add_each
+		// after: [pivot, size) size: to_add_each
+		double before_inc = (double)pivot / to_add_each;
+		double after_inc = ((double)(pts.size() - pivot)) / to_add_each;
+#pragma omp critical
+		{
+			prog++;
+			if (before_inc < 1) {
+				aerr = 1;
+			} else if (after_inc < 1) {
+				aerr = -1;
+			}
+		}
+		double before_start = 0;
+		double after_start = pivot;
+		double top_start = 0;
+		size_t size_before = pairs.size();
+		vector<pair<Point<T>*,Point<T>*> > buf;
+		// Adds points above cutoff by adding before_inc
+		for (int i = 0; i < to_add_each; i++) {
+			int idx = round(before_start);
+			int dist = pts[idx]->distance(*p);
+			//	cout << p->get_header() << " " << pts[idx]->get_header() << " " << dist << endl;
+			auto pr = p->get_header().compare(pts[idx]->get_header()) < 0 ? make_pair(p, pts[idx]) : make_pair(pts[idx], p);
+			buf.push_back(pr);
+			before_start += before_inc;
+		}
+		// Adds points before cutoff by adding after_inc
+		for (int i = 0; i < to_add_each && round(after_start) < pts.size(); i++) {
+			int idx = round(after_start);
+			int dist = pts[idx]->distance(*p);
+			//		cout << p->get_header() << " " << pts[idx]->get_header() << " " << dist << endl;
+			auto pr = p->get_header().compare(pts[idx]->get_header()) < 0 ? make_pair(p, pts[idx]) : make_pair(pts[idx], p);
+			buf.push_back(pr);
+			after_start += after_inc;
+		}
+#pragma omp critical
+		{
+			// Adds buffer to total pairs
+		// 	for (auto p : buf) {
+// 				pairs.push_back(p);
+// 			}
+			pairs.insert(std::begin(buf), std::end(buf));
+		}
+//			cout << "added " << pairs.size() - size_before << " pairs" << endl;
+	}
+	prog.end();
+	if (aerr < 0) {
+		cerr << "Warning: Alignment may be too small for sampling" << endl;
+	} else if (aerr > 0) {
+		cerr << "Warning: Alignment may be too large for sampling" << endl;
+	}
+	int i = 0;
+	for (auto a : pairs) {
+		cout << "Before Pair: " << a.first->get_header() << ", " << a.second->get_header() << endl;
+		if (++i == 4) {
+			break;
+		}
+	}
+	return std::vector<std::pair<Point<T>*,Point<T>*> >(pairs.begin(), pairs.end());
+}
+template<class T>
+std::pair<std::map<std::pair<Point<T>*, Point<T>*>, double>,
+	  std::map<std::pair<Point<T>*, Point<T>*>, double> >
+Trainer<T>::split_old() {
+	using train_map = std::map<std::pair<Point<T>*, Point<T>*>, double>;
+	std::pair<train_map, train_map> split;
+	int bandwidth = (1.0 - cutoff) * 10000;
+	size_t last_cutoff = points.size() / 2;
+	while (split.first.size() < n_points) {
+		Point<T> *p = points[last_cutoff];
+		std::sort(points.begin(), points.end(), [&](const Point<T>* a,
+							    const Point<T>* b) -> bool {
+				  return a->distance(*p) < b->distance(*p);
+			  });
+		int b_cutoff = points.size() / 2;
+		for (int offset = b_cutoff; offset >= 1; offset /= 2) {
+			int dist = p->distance(*points[b_cutoff]);
+			if (dist < bandwidth) {
+				b_cutoff += offset;
+			} else if (dist > bandwidth) {
+				b_cutoff -= offset;
+			} else {
+				break;
+			}
+		}
+		size_t cutoff_index = points.size();
+		const size_t count = split.first.size();
+
+		if (b_cutoff >= max_pts_from_one) {
+			double ratio = (double)b_cutoff / max_pts_from_one;
+			double sum = 0;
+			for (size_t q = 0; q < max_pts_from_one; q++) {
+				size_t i = round(sum);
+				if (i >= points.size()) {
+					cerr << "this shouldn't happen" << endl;
+					throw "this shouldn't happen";
+				}
+				double alignment = align(p, points[i]);
+				if (alignment < cutoff) {
+					cutoff_index = i + 10;
+					break;
+				}
+				if (split.first.size() < n_points) {
+					split.first[make_pair(p, points[i])] = alignment;
+				}
+				sum += ratio;
+			}
+		} else {
+			for (size_t i = 1; i < cutoff_index; i++) {
+				double alignment = align(p, points[i]);
+				if (alignment < cutoff) {
+					cutoff_index = i + 10;
+					break;
+				}
+				if (split.first.size() < n_points) {
+					split.first[make_pair(p, points[i])] = alignment;
+				}
+			}
+		}
+		size_t similar_points_added = split.first.size() - count;
+		size_t available_points = points.size() - cutoff_index;
+		if (available_points == 0 || available_points <= similar_points_added) {
+			cerr << "change cutoff value, points are too similar" << endl;
+			throw "change cutoff value, points are too similar";
+		}
+		double ratio = (double)(available_points - 1.0) / (double)similar_points_added;
+		double sum = 0;
+		for (size_t q = 0; q < similar_points_added; q++) {
+			size_t i = cutoff_index + round(sum);
+			if (i >= points.size()) {
+				break;
+			}
+			double alignment = align(p, points[i]);
+			split.second[make_pair(p, points[i])] = alignment;
+			sum += ratio;
+		}
+	        if (split.first.size() != split.second.size()) {
+			cerr << "something happened";
+			throw "something happened";
+		}
+		last_cutoff = cutoff_index;
+	}
+	for (auto p : points) {
+		p->set_data_str("");
+	}
+	return split;
+}
+
+
+int gcd(int a, int b)
+{
+	if (b <= 0) {
+		return a;
+	}
+	return gcd(b, a % b);
+}
+int gcd_vec(std::vector<int> v)
+{
+	int ret = v[0];
+	for (size_t i = 1; i < v.size(); i++) {
+		if (v[i] == 0) {
+			continue;
+		}
+		ret = gcd(ret, v[i]);
+	}
+	return ret;
+}
+
+inline int sign(double x) {
+	return (x > 0) - (x < 0);
+}
+void scale(double (&mat)[4][4], double &sigma, double& epsilon)
+{
+	double scale_factor = 100000;
+	std::vector<int> signs, scaled;
+	signs.push_back(sign(sigma));
+	scaled.push_back(round(scale_factor * fabs(sigma)));
+	signs.push_back(sign(epsilon));
+	scaled.push_back(round(scale_factor * fabs(epsilon)));
+	for (int i = 0; i < 4; i++) {
+		for (int j = 0; j < 4; j++) {
+			signs.push_back(sign(mat[i][j]));
+			scaled.push_back(round(scale_factor * fabs(mat[i][j])));
+		}
+	}
+	double common_div = gcd_vec(scaled);
+	sigma = signs[0] * scaled[0] / common_div;
+	epsilon = signs[1] * scaled[1] / common_div;
+	int count = 2;
+	for (int i = 0; i < 4; i++) {
+		for (int j = 0; j < 4; j++) {
+			mat[i][j] = signs[count] * scaled[count] / common_div;
+			count++;
+		}
+	}
+}
+
+template<class T>
+void Trainer<T>::init(double (&matrix)[4][4], double sig, double eps)
+{
+	scale(matrix, sig, eps);
+	for (int i = 0; i < 4; i++) {
+		for (int j = 0; j < 4; j++) {
+			mat[i][j] = (int)matrix[i][j];
+		}
+	}
+	sigma = (int)sig;
+	eps = (int)eps;
+	// sf.emplace_back([](Point<T>* a, Point<T> *b) {
+	// 	        return Feature<T>::manhattan(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T> *b) {
+	// 		return Feature<T>::length_difference(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T> *b) {
+	// 		return Feature<T>::rree_k_r(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::length_difference(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::intersection(*a, *b);
+	// 	}, true);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::jenson_shannon(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::simratio(*a, *b);
+	// 	}, true);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::squaredchord(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::manhattan(*a, *b);
+	// 	}, false);
+	// sf.emplace_back([](Point<T>* a, Point<T>* b) {
+	// 		return Feature<T>::pearson(*a, *b);
+	// 	}, true);
+
+}
+template class Trainer<uint8_t>;
+template class Trainer<uint16_t>;
+template class Trainer<uint32_t>;
+template class Trainer<uint64_t>;
+template class Trainer<int>;
+template class Trainer<double>;
diff --git a/src/cluster/src/Trainer.h b/src/cluster/src/Trainer.h
new file mode 100644
index 0000000..8801172
--- /dev/null
+++ b/src/cluster/src/Trainer.h
@@ -0,0 +1,67 @@
+/* -*- C++ -*- */
+#ifndef TRAINER_H
+#define TRAINER_H
+
+#include "Point.h"
+#include "GLM.h"
+#include "Feature.h"
+#include "bvec.h"
+#include "Center.h"
+#include "LogTable.h"
+#include <set>
+
+template<class T>
+class Trainer {
+public:
+	Trainer(std::vector<Point<T>*> v, size_t num_points, size_t largest_count, double cutoff_, size_t max_pts_from_one_, double (&matrix)[4][4], double sig, double eps, int ksize) : points(v), n_points(num_points), cutoff(cutoff_), max_pts_from_one(max_pts_from_one_), k(ksize) {
+		init(matrix, sig, eps);
+		uintmax_t size = 1000 * 1000 * 10;
+		feat = new Feature<T>(k);
+	};
+	~Trainer() { delete feat_mat; delete feat; }
+	std::pair<std::map<std::pair<Point<T>*, Point<T>*>, double>,
+		  std::map<std::pair<Point<T>*, Point<T>*>, double> > split_old();
+        vector<std::pair<Point<T>*,Point<T>*> > split();
+	double train_n(pair<vector<pair<Point<T>*,
+			 Point<T>*
+			 > >,
+	     vector<pair<Point<T>*,
+		       Point<T>*> > > &data, int ncols);
+	void train(int min_n_feat, int max_n_feat, uint64_t feat_type, int mut_type, double min_id, double acc_cutoff=97.5);
+	void mutate_seqs(Point<T>* p, size_t num_seq, vector<pra<T> > &pos_buf, vector<pra<T> > &neg_buf, double id_begin, double id_end, uintmax_t& _id);
+	std::tuple<Point<T>*,double,size_t,size_t> get_close(Point<T>*, bvec_iterator<T> istart, bvec_iterator<T> iend,  bool& is_min) const;
+//	vector<pair<int, double> > get_close(Point<T>*, const vector<pair<Point<T>*,int> > &,  bool& is_min) const;
+	std::pair<std::pair<vector<pair<Point<T>*,Point<T>*> >,
+		    vector<pair<Point<T>*,Point<T>*> > >,
+	  std::pair<vector<pair<Point<T>*,Point<T>*> >,
+		    vector<pair<Point<T>*,Point<T>*> > > >
+	new_get_labels(std::vector<Point<T>*> &points, size_t num_sample, double id, uintmax_t &_id);
+	void filter(Point<T>*, vector<pair<Point<T>*,bool> >&) const;
+	Point<T>* closest(Point<double>*, vector<pair<Point<T>*,bool> >&) const;
+	long merge(vector<Center<T> > &centers, long current, long begin, long end) const;
+//	Point<T>* merge(Point<T>*, vector<pair<Point<T>*,double> >&) const;
+private:
+	matrix::GLM glm;
+	matrix::Matrix weights;
+	double align(Point<T>* a, Point<T>* b) const;
+	std::pair<matrix::Matrix,matrix::Matrix> generate_feat_mat(pair<vector<pair<Point<T>*,
+			 Point<T>*
+			 > >,
+	     vector<pair<Point<T>*,
+					 Point<T>*> > > &data, int ncols);
+	void init(double (&matrix)[4][4], double sig, double eps);
+	pair<vector<pair<Point<T>*,
+			 Point<T>*
+			 > >,
+	     vector<pair<Point<T>*,
+			 Point<T>*> > > get_labels(vector<std::pair<Point<T>*,Point<T>*> >&, double cutoff) const;
+	Feature<T> *feat;
+	int mat[4][4];
+	int sigma, epsilon;
+	std::vector<Point<T>*> points;
+	matrix::Matrix *feat_mat = NULL;
+	size_t n_points, max_pts_from_one;
+	double cutoff;
+	int k;
+};
+#endif
diff --git a/src/cluster/src/bvec.cpp b/src/cluster/src/bvec.cpp
new file mode 100644
index 0000000..2efed1e
--- /dev/null
+++ b/src/cluster/src/bvec.cpp
@@ -0,0 +1,332 @@
+/* -*- C++ -*-
+ *
+ * bvec.cpp
+ *
+ * Author: Benjamin T James
+ */
+#include "bvec.h"
+#include <algorithm>
+template<class T>
+bvec<T>::bvec(vector<uint64_t>& lengths, uint64_t bin_size)
+{
+	uint64_t num_points = lengths.size();
+	std::sort(std::begin(lengths), std::end(lengths));
+	for (uint64_t i = 0; i < lengths.size(); i += bin_size) {
+		begin_bounds.push_back(lengths[i]);
+		// uint64_t last_index = std::min((uint64_t)lengths.size() - 1,
+		// 			       i + bin_size - 1);
+		//std::cout << "[" << i << " " << last_index << "]" << std::endl;
+	}
+	data.reserve(begin_bounds.size());
+	for (uint64_t i = 0; i < begin_bounds.size(); i++) {
+		data.push_back({});
+	}
+}
+
+template<class T>
+Point<T>* bvec<T>::pop()
+{
+	for (auto& bin : data) {
+		if (!bin.empty()) {
+			Point<T>* p = bin[0].first;
+			bin.erase(std::begin(bin));
+			return p;
+		}
+	}
+	return NULL;
+}
+
+template<class T>
+Point<T>* bvec<T>::peek() const
+{
+	for (auto& bin : data) {
+		if (!bin.empty()) {
+			Point<T>* p = bin[0].first;
+			return p;
+		}
+	}
+	return NULL;
+}
+
+template<class T>
+bool bvec<T>::inner_index_of(uint64_t length, size_t &idx, size_t *pfront, size_t *pback) const
+{
+
+	if (data.at(idx).empty() || idx == data.size()) {
+		if (pfront) {
+			for (size_t i = 0; i < data.size(); i++) {
+				if (!data.at(i).empty()) {
+					idx = i;
+					*pfront = 0;
+					break;
+				}
+			}
+		}
+		if (pback) {
+			for (int i = data.size()-1; i >= 0; i--) {
+				if (!data.at(i).empty()) {
+					idx = i;
+					*pback = 0;
+					break;
+				}
+			}
+		}
+		return true;
+	}
+	size_t front = 0, back = 0;
+	size_t low = 0, high = data.at(idx).size() - 1;
+	bool found = false;
+	if (length < data[idx][low].first->get_length() && pfront != NULL) {
+		*pfront = low;
+	}
+	if (length > data[idx][high].first->get_length() && pback != NULL) {
+		*pback = high;
+	}
+	for (;low <= high;) {
+		size_t mid = (low + high) / 2;
+		uint64_t d = data[idx][mid].first->get_length();
+		if (d == length) {
+			front = mid;
+			back = mid;
+			found = true;
+			break;
+		} else if (length < d) {
+			high = mid;
+		} else if (length > d) {
+			low = mid + 1;
+		}
+		if (low == high) {
+			found = true;
+			front = low;
+			back = high;
+			break;
+		}
+	}
+	if (pfront) {
+		for (long i = front; i >= 0
+			     && data[idx][i].first->get_length() == length; i--) {
+			front = i;
+		}
+		*pfront = front;
+	}
+	if (pback) {
+		for (long i = back; i < data[idx].size()
+			     && data[idx][i].first->get_length() == length; i++) {
+			back = i;
+		}
+		*pback = back;
+	}
+	return true;
+}
+
+template<class T>
+bool bvec<T>::index_of(uint64_t point, size_t* pfront, size_t* pback) const
+{
+	size_t low = begin_bounds.size()-1, high = 0;
+
+	for (size_t i = 0; i < begin_bounds.size(); i++) {
+		size_t prev = 0;
+		size_t prev_index = 0;
+		if (i > 0) {
+			prev_index = i - 1;
+			prev = begin_bounds[i-1];
+		}
+		if (point >= prev && point <= begin_bounds[i]) {
+			low = std::min(low, prev_index);
+			high = std::max(high, prev_index);
+		}
+	}
+	if (point >= begin_bounds[begin_bounds.size()-1]) {
+		high = std::max(high, begin_bounds.size()-1);
+	}
+	if (pfront) {
+		*pfront = low;
+	}
+	if (pback) {
+		*pback = high;
+	}
+	return true;
+}
+
+template<class T>
+void bvec<T>::insert(Point<T> *p)
+{
+	uint64_t len = p->get_length();
+	size_t front = 0, back = 0;
+	bool good = index_of(len, &front, &back);
+	if (!good || front > back) {
+		std::cerr << "error: list is not sorted" << std::endl;
+	}
+	std::vector<size_t> min_sizes;
+	size_t minimum = std::numeric_limits<size_t>::max();
+	for (size_t i = front; i <= back; i++) {
+		size_t sz = data[i].size();
+		if (sz < minimum) {
+			minimum = sz;
+			min_sizes.clear();
+			min_sizes.push_back(i);
+		} else if (sz == minimum) {
+			min_sizes.push_back(i);
+		}
+	}
+	if (min_sizes.empty()) {
+		std::cerr << "error: no bins to insert into, item not inserted" << std::endl;
+	}
+	auto mid_min = min_sizes[min_sizes.size() / 2];
+	data.at(mid_min).push_back(std::make_pair(p, false));
+}
+
+template<class T>
+size_t bvec<T>::size() const
+{
+	size_t num_bins = data.size();
+	size_t total_size = 0;
+	for (size_t i = 0; i < num_bins; i++) {
+		total_size += data[i].size();
+	}
+	return total_size;
+}
+
+template<class T>
+size_t bvec<T>::report() const
+{
+	cout << "BVec: ";
+	size_t num_bins = data.size();
+	cout << "num_bins=" << num_bins << endl;
+	size_t total_size = 0;
+	for (size_t i = 0; i < num_bins; i++) {
+		uint64_t next_bound = std::numeric_limits<uint64_t>::max();
+		if (i + 1 < num_bins) {
+			next_bound = begin_bounds[i+1];
+		}
+		cout << "Bin " << i << ": [" << begin_bounds[i] << " " << next_bound << "] size=" << data[i].size() << endl;
+		total_size += data[i].size();
+	}
+	cout << "total_size=" << total_size << endl;
+	return total_size;
+}
+template<class T>
+void bvec<T>::insert_finalize()
+{
+	auto sorter = [](const std::pair<Point<T>*,bool> a, const std::pair<Point<T>*,bool> b) {
+		return a.first->get_length() < b.first->get_length();
+	};
+	for (size_t i = 0; i < data.size(); i++) {
+		std::sort(std::begin(data[i]), std::end(data[i]), sorter);
+		data[i].shrink_to_fit();
+	}
+}
+
+template<class T>
+bool bvec<T>::empty() const
+{
+	bool is_empty = true;
+	for (auto bin : data) {
+		if (!bin.empty()) {
+			is_empty = false;
+			break;
+		}
+	}
+	return is_empty;
+}
+
+
+template<class T>
+uint64_t bvec<T>::absolute_idx(bvec_idx_t idx) const
+{
+	uint64_t ptr = 0;
+	for (int i = 0; i < idx.first; i++) {
+		ptr += data[i].size();
+	}
+	ptr += idx.second;
+	return ptr;
+}
+
+template<class T>
+std::pair<bvec_idx_t, bvec_idx_t>
+bvec<T>::get_range(uint64_t begin_len, uint64_t end_len) const
+{
+	/* perform binary search to find bin */
+	bvec_idx_t front, back;
+	front.first = 0;
+	front.second = 0;
+	back.first = data.size()-1;
+	back.second = data[back.first].size() - 1;
+	if (!index_of(begin_len, &front.first, NULL)) {
+		throw 100;
+	}
+	if (!index_of(end_len, NULL, &back.first)) {
+		throw 100;
+	}
+	if (!inner_index_of(begin_len, front.first, &front.second, NULL)) {
+		throw 100;
+	}
+	if (!inner_index_of(end_len, back.first, NULL, &back.second)) {
+		throw 100;
+	}
+	// if (back.first != data.size()) { // ++ to make it an end iterator
+	// 	if (back.second != data[back.first].size()) {
+	// 		back.second++;
+	// 	} else {
+	// 		back.first++;
+	// 		back.second = 0;
+	// 	}
+	// } else {
+	// 	throw 101;
+	// }
+	return std::make_pair(front, back);
+}
+
+template<class T>
+void bvec<T>::erase(size_t r, size_t c)
+{
+	data.at(r).erase(data.at(r).begin() + c);
+}
+
+/*
+ * TODO: change available to Center class so no intermediate copying is done
+ */
+template<class T>
+void bvec<T>::remove_available(bvec_idx_t begin, bvec_idx_t end, std::vector<Point<T>*> &available)
+{
+	size_t a = begin.first;
+	size_t b = end.first;
+	int num = 0, new_num = 0;
+	auto func = [](const bv_data_type<T> d) { return d.second; };
+	auto inserter = [&](const std::pair<Point<T>*,bool> p) {
+		if (p.second) {
+#pragma omp critical
+			available.push_back(p.first);
+		}
+	};
+	#pragma omp parallel for
+	for (size_t i = a; i <= b; i++) {
+		/* move marked points to end of vector, then copy, then erase */
+		//const auto last = std::remove_if(std::begin(data[i]), std::end(data[i]), func);
+		for (int j = 0; j < data[i].size(); j++) {
+			auto kv = data[i][j];
+			if (kv.second) {
+#pragma omp critical
+				{
+					available.push_back(kv.first);
+				}
+			}
+		}
+		data[i].erase(std::remove_if(std::begin(data[i]), std::end(data[i]), func), std::end(data[i]));
+	}
+}
+
+
+template<class T>
+bvec_iterator<T> bvec<T>::iter(bvec_idx_t idx)
+{
+	return bvec_iterator<T>(idx.first, idx.second, &data);
+}
+
+
+template class bvec<uint8_t>;
+template class bvec<uint16_t>;
+template class bvec<uint32_t>;
+template class bvec<uint64_t>;
+template class bvec<int>;
+template class bvec<double>;
diff --git a/src/cluster/src/bvec.h b/src/cluster/src/bvec.h
new file mode 100644
index 0000000..43384e9
--- /dev/null
+++ b/src/cluster/src/bvec.h
@@ -0,0 +1,69 @@
+/* -*- C++ -*-
+ *
+ * bvec.h
+ *
+ * Author: Benjamin T James
+ */
+#ifndef BVEC_H
+#define BVEC_H
+
+#include "Point.h"
+#include "bvec_iterator.h"
+
+typedef struct bvec_idx {
+	size_t first, second;
+} bvec_idx_t;
+
+/*
+ * operations needed:
+ *
+ * find bounds (range)
+ * get available or min and remove
+ *
+ */
+template<class T>
+using bv_data_type = std::pair<Point<T>*, bool>;
+
+template<class T>
+using bv_row_type = vector<bv_data_type<T> >;
+
+template<class T>
+using bv_col_type = vector<bv_row_type<T> >;
+
+template<class T>
+class bvec {
+public:
+	bvec(vector<uint64_t>& lengths, uint64_t bin_size=1000);
+
+	Point<T>* pop();
+	Point<T>* peek() const;
+	void insert(Point<T>* data);
+	void insert_finalize(); /* sorts bins */
+
+
+	bool index_of(uint64_t length, size_t* front, size_t* back) const;
+	bool inner_index_of(uint64_t length, size_t& idx, size_t *front, size_t *back) const;
+	bool empty() const;
+
+	std::pair<bvec_idx_t, bvec_idx_t>
+	get_range(uint64_t begin_len, uint64_t end_len) const;
+
+	void remove_available(bvec_idx_t begin, bvec_idx_t end, std::vector<Point<T>*> &);
+
+	uint64_t absolute_idx(bvec_idx_t idx) const;
+
+        bvec_iterator<T> iter(bvec_idx_t idx);
+	typedef bvec_iterator<T> iterator;
+	typedef bvec_iterator<T> const_iterator;
+
+	size_t report() const;
+	size_t size() const;
+
+	void erase(size_t r, size_t c);
+private:
+        bv_col_type<T> data;
+	vector<uint64_t> begin_bounds;
+};
+
+
+#endif
diff --git a/src/cluster/src/bvec_iterator.cpp b/src/cluster/src/bvec_iterator.cpp
new file mode 100644
index 0000000..f8d1c76
--- /dev/null
+++ b/src/cluster/src/bvec_iterator.cpp
@@ -0,0 +1,28 @@
+#include "bvec_iterator.h"
+
+template<class T>
+bvec_iterator<T> bvec_iterator<T>::operator++()
+{
+	if (r != col->size()) {
+		if (c + 1 < col->at(r).size()) {
+			c++;
+		} else {
+			r++;
+			c = 0;
+			while (r < col->size() && col->at(r).empty()) {
+				r++;
+			}
+		}
+	} else {
+		cerr << "tried incrementing null iterator" << endl;
+		throw 10;
+	}
+	return *this;
+}
+
+template class bvec_iterator<uint8_t>;
+template class bvec_iterator<uint16_t>;
+template class bvec_iterator<uint32_t>;
+template class bvec_iterator<uint64_t>;
+template class bvec_iterator<int>;
+template class bvec_iterator<double>;
diff --git a/src/cluster/src/bvec_iterator.h b/src/cluster/src/bvec_iterator.h
new file mode 100644
index 0000000..6be6ba8
--- /dev/null
+++ b/src/cluster/src/bvec_iterator.h
@@ -0,0 +1,84 @@
+/* -*- C++ -*-
+ *
+ * bvec_iterator.h
+ *
+ * Author: Benjamin T James
+ */
+#include "bvec.h"
+#ifndef BVEC_ITERATOR_H
+#define BVEC_ITERATOR_H
+
+
+template<class T>
+class bvec_iterator {
+public:
+	// iterator: split ALL possible points into chunks by indices
+	using dtype = std::pair<Point<T>*,bool>;
+	using vtype = vector<vector<dtype> >;
+	bvec_iterator(size_t _r,
+		      size_t _c,
+		      vtype* col_) : r(_r), c(_c), col(col_) {}
+
+	bvec_iterator operator++();
+	bvec_iterator operator++(int x) {
+		return ++(*this);
+	}
+	dtype& operator*() {
+		return col->at(r).at(c);
+	}
+	void operator+=(int64_t n) {
+		if (n < 0) {
+			throw "oops";
+		}
+		for (int i = 0; i < n; i++) {
+			operator++();
+		}
+	}
+	bool operator==(const bvec_iterator& rhs) const {
+		return rhs.c == c && rhs.r == r;
+	}
+	bool operator<(const bvec_iterator& rhs) const {
+		if (r < rhs.r) {
+			return true;
+		} else if (r == rhs.r) {
+			return c < rhs.c;
+		} else {
+			return false;
+		}
+	}
+	bool operator<=(const bvec_iterator& rhs) const {
+		if (r < rhs.r) {
+			return true;
+		} else if (r == rhs.r) {
+			return c <= rhs.c;
+		} else {
+			return false;
+		}
+	}
+	bool operator!=(const bvec_iterator& rhs) const {
+		return r != rhs.r || c != rhs.c;
+	}
+	int64_t operator-(const bvec_iterator& rhs) const {
+		int64_t sum = 0;
+		if (*this < rhs) {
+			return -1 * (rhs - *this);
+		}
+		// subtract cols until last row is reached
+		if (r == rhs.r) {
+			return c - rhs.c;
+		}
+		sum += c;
+		sum += col->at(rhs.r).size() - rhs.c;
+		for (size_t i = rhs.r + 1; i < r; i++) {
+			sum += col->at(i).size();
+		}
+		return sum;
+	}
+	// bvec_iterator operator[](uint64_t idx) {
+
+	// }
+//private:
+	size_t r,c;
+        vtype* col;
+};
+#endif
diff --git a/src/cluster/src/main.cpp b/src/cluster/src/main.cpp
new file mode 100644
index 0000000..562fd96
--- /dev/null
+++ b/src/cluster/src/main.cpp
@@ -0,0 +1,12 @@
+/* -*- C++ -*-
+ *
+ * main.cpp
+ *
+ * Author: Benjamin T James
+ */
+#include "Runner.h"
+int main(int argc, char **argv)
+{
+	Runner runner(argc, argv);
+	return runner.run();
+}
diff --git a/src/cluster/src/needleman_wunsch.cpp b/src/cluster/src/needleman_wunsch.cpp
new file mode 100644
index 0000000..46d0b5b
--- /dev/null
+++ b/src/cluster/src/needleman_wunsch.cpp
@@ -0,0 +1,153 @@
+/* -*- C++ -*-
+ *
+ * needleman_wunsch.cpp
+ *
+ * Author: Benjamin T James
+ */
+#include "needleman_wunsch.h"
+
+
+//flags that can be combined
+#define HORIZ 1
+#define VERT  2
+#define DIAG  4
+void needleman_wunsch::fill(int i, int j)
+{
+	if (i == 0 || j == 0) {
+		if (i == j) {
+			int offset = at(i, j);
+			score[offset] = 0;
+			direction[offset] = DIAG; // for backtracking
+			horiz_gap_len[offset] = 0;
+			vert_gap_len[offset] = 0;
+		} else if (i == 0) {
+			int offset = at(0, j);
+			int last_offset = at(0, j-1);
+			score[offset] = score[last_offset] + gap(j);
+			horiz_gap_len[offset] = 0;
+			vert_gap_len[offset] = j;
+			direction[offset] = VERT;
+		} else { // j == 0
+			int offset = at(i, 0);
+			int last_offset = at(i-1, 0);
+			score[offset] = score[last_offset] + gap(i);
+			horiz_gap_len[offset] = i;
+			vert_gap_len[offset] = 0;
+			direction[offset] = HORIZ;
+		}
+		return;
+	}
+	int i_diag = at(i-1, j-1);
+	int i_horiz = at(i-1, j);
+	int i_vert = at(i, j-1);
+	int i_cur = at(i, j);
+
+	int hlen = horiz_gap_len[i_horiz] + 1;
+	int vlen = vert_gap_len[i_vert] + 1;
+
+	int diag_score = score[i_diag] + match_score(s1[i], s2[j]);
+	int horiz_score = score[i_horiz] + gap(hlen);
+	int vert_score = score[i_vert] + gap(vlen);
+	score[i_cur] = std::max(std::max(diag_score, horiz_score), vert_score);
+	direction[i_cur] = 0;
+
+	// we could match multiple high scores
+	if (score[i_cur] == diag_score) {
+		direction[i_cur] |= DIAG;
+	}
+	if (score[i_cur] == vert_score) {
+		direction[i_cur] |= VERT;
+		vert_gap_len[i_cur] = vlen;
+	} else {
+		vert_gap_len[i_cur] = 0;
+	}
+	if (score[i_cur] == horiz_score) {
+		direction[i_cur] |= HORIZ;
+		horiz_gap_len[i_cur] = hlen;
+	} else {
+		horiz_gap_len[i_cur] = 0;
+	}
+}
+
+std::pair<std::string, std::string>
+needleman_wunsch::backtrack()
+{
+	std::string a1 = "", a2 = "";
+	int cur_i = l1 - 1;
+	int cur_j = l2 - 1;
+	while (cur_i >= 0 && cur_j >= 0) {
+		uint8_t dir = direction[at(cur_i, cur_j)];
+		if (dir & DIAG) {
+			a1 += s1[cur_i--];
+			a2 += s2[cur_j--];
+		} else if (dir & HORIZ) {
+			a1 += s1[cur_i--];
+			a2 += '-';
+		} else if (dir & VERT) {
+			a1 += '-';
+			a2 += s2[cur_j--];
+		}
+	}
+	std::string r1(a1.rbegin(), a1.rend());
+	std::string r2(a2.rbegin(), a2.rend());
+	return std::make_pair(r1, r2);
+}
+
+
+std::pair<std::string, std::string>
+needleman_wunsch::align()
+{
+	for (int i = 0; i < l1; i++) {
+		for (int j = 0; j < l2; j++) {
+			fill(i, j);
+		}
+	}
+	return backtrack();
+}
+double needleman_wunsch::identity(std::pair<std::string, std::string> alignment) const
+{
+	int len = alignment.first.length();
+	double count = 0;
+	for (int i = 0; i < len; i++) {
+		if (alignment.first[i] == alignment.second[i]) {
+			count++;
+		}
+	}
+	return 1.0 * count / len;
+}
+
+int needleman_wunsch::gap(int gaplen) const
+{
+	return sigma + (gaplen - 1) * epsilon;
+}
+
+int needleman_wunsch::match_score(char a, char b) const
+{
+	return a == b ? match : mismatch;
+}
+
+needleman_wunsch::needleman_wunsch(const std::string &s1_, const std::string& s2_, int match_, int mismatch_, int sigma_, int epsilon_)
+{
+	int l1_ = s1_.length();
+	int l2_ = s2_.length();
+	if (l1_ >= l2_) {
+		l1 = l1_;
+		l2 = l2_;
+		s1 = s1_;
+		s2 = s2_;
+	} else {
+		l1 = l2_;
+		l2 = l1_;
+		s1 = s2_;
+		s2 = s1_;
+	}
+	sigma = -sigma_;
+	epsilon = -epsilon_;
+	match = match_;
+	mismatch = mismatch_;
+	int matlen = l1 * l2;
+	score = new int[matlen];
+	direction = new uint8_t[matlen];
+	horiz_gap_len = new int[matlen];
+	vert_gap_len = new int[matlen];
+}
diff --git a/src/cluster/src/needleman_wunsch.h b/src/cluster/src/needleman_wunsch.h
new file mode 100644
index 0000000..031ea10
--- /dev/null
+++ b/src/cluster/src/needleman_wunsch.h
@@ -0,0 +1,43 @@
+/* -*- C++ -*-
+ *
+ * needleman_wunsch.h
+ *
+ * Author: Benjamin T James
+ */
+
+#ifndef NEEDLEMAN_WUNSCH_H
+#define NEEDLEMAN_WUNSCH_H
+
+#include <string>
+
+class needleman_wunsch {
+public:
+	needleman_wunsch(const std::string& s1, const std::string& s2, int match_,  int mismatch_, int sigma_, int epsilon_);
+	~needleman_wunsch() {
+		delete[] score;
+		delete[] direction;
+		delete[] horiz_gap_len;
+		delete[] vert_gap_len;
+	}
+	double identity(std::pair<std::string, std::string> p) const;
+	std::pair<std::string, std::string>
+	align();
+private:
+	int gap(int gap_len) const;
+	int match_score(char a, char b) const;
+	inline int at(int a, int b) const { return a * l2 + b; };
+	void fill(int,int);
+	std::pair<std::string, std::string> backtrack();
+	int match, mismatch;
+	int sigma, epsilon;
+	std::string s1, s2;
+	int l1, l2;
+
+	int *score;
+	uint8_t *direction;
+	int *horiz_gap_len;
+	int *vert_gap_len;
+};
+
+
+#endif
diff --git a/src/exception/FileDoesNotExistException.cpp b/src/exception/FileDoesNotExistException.cpp
new file mode 100644
index 0000000..9093f5a
--- /dev/null
+++ b/src/exception/FileDoesNotExistException.cpp
@@ -0,0 +1,25 @@
+/*
+ * FileDoesNotExistException.cpp
+ *
+ *  Created on: Apr 30, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "FileDoesNotExistException.h"
+
+#include <iostream>
+#include <string>
+
+using namespace std;
+
+namespace exception{
+
+FileDoesNotExistException::FileDoesNotExistException(string massage) {
+	cerr << "File Does Not Exist Exception" << endl;
+	cerr << massage << endl;
+}
+
+FileDoesNotExistException::~FileDoesNotExistException() {
+	// TODO Auto-generated destructor stub
+}
+}
diff --git a/src/exception/FileDoesNotExistException.h b/src/exception/FileDoesNotExistException.h
new file mode 100644
index 0000000..c8ec3ae
--- /dev/null
+++ b/src/exception/FileDoesNotExistException.h
@@ -0,0 +1,23 @@
+/*
+ * FileDoesNotExistException.h
+ *
+ *  Created on: Apr 30, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef FILEDOESNOTEXISTEXCEPTION_H_
+#define FILEDOESNOTEXISTEXCEPTION_H_
+
+#include <string>
+
+using namespace std;
+
+namespace exception {
+	class FileDoesNotExistException {
+	public:
+		FileDoesNotExistException(string);
+		~FileDoesNotExistException();
+	};
+}
+
+#endif /* FILEDOESNOTEXISTEXCEPTION_H_ */
diff --git a/src/exception/InvalidInputException.cpp b/src/exception/InvalidInputException.cpp
new file mode 100644
index 0000000..d69f67c
--- /dev/null
+++ b/src/exception/InvalidInputException.cpp
@@ -0,0 +1,24 @@
+/*
+ * InvalidInputException.cpp
+ *
+ *  Created on: May 1, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "InvalidInputException.h"
+
+#include <string>
+#include <iostream>
+
+using namespace std;
+namespace exception{
+
+InvalidInputException::InvalidInputException(string msg) {
+	cerr << "Invalid Input Exception" << endl;
+	cerr << msg << endl;
+}
+
+InvalidInputException::~InvalidInputException() {
+	// TODO Auto-generated destructor stub
+}
+}
diff --git a/src/exception/InvalidInputException.h b/src/exception/InvalidInputException.h
new file mode 100644
index 0000000..9db2534
--- /dev/null
+++ b/src/exception/InvalidInputException.h
@@ -0,0 +1,23 @@
+/*
+ * InvalidInputException.h
+ *
+ *  Created on: May 1, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef INVALIDINPUTEXCEPTION_H_
+#define INVALIDINPUTEXCEPTION_H_
+
+#include<string>
+
+using namespace std;
+
+namespace exception {
+	class InvalidInputException {
+	public:
+		InvalidInputException(string);
+		~InvalidInputException();
+	};
+}
+
+#endif /* INVALIDINPUTEXCEPTION_H_ */
diff --git a/src/exception/InvalidOperationException.cpp b/src/exception/InvalidOperationException.cpp
new file mode 100644
index 0000000..8d1a6f6
--- /dev/null
+++ b/src/exception/InvalidOperationException.cpp
@@ -0,0 +1,19 @@
+/*
+ * InvalidOperationException.cpp
+ *
+ *  Created on: Dec 20, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include <iostream>
+#include "InvalidOperationException.h"
+
+
+namespace exception {
+
+InvalidOperationException::InvalidOperationException(string msg) : std::runtime_error(msg) {
+	cerr << "Invalid Operation Exception." << endl;
+	cerr << what() << endl;
+}
+
+}
diff --git a/src/exception/InvalidOperationException.h b/src/exception/InvalidOperationException.h
new file mode 100644
index 0000000..74eb1e7
--- /dev/null
+++ b/src/exception/InvalidOperationException.h
@@ -0,0 +1,26 @@
+/*
+ * InvalidOperationException.h
+ *
+ *  Created on: Dec 20, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef INVALIDOPERATIONEXCEPTION_H_
+#define INVALIDOPERATIONEXCEPTION_H_
+
+#include <string>
+#include <stdexcept>
+
+using namespace std;
+
+namespace exception {
+
+class InvalidOperationException : public std::runtime_error{
+public:
+	InvalidOperationException(string msg);
+	//virtual ~InvalidOperationException();
+};
+
+}
+
+#endif /* INVALIDOPERATIONEXCEPTION_H_ */
diff --git a/src/exception/InvalidOrderOfOperationsException.cpp b/src/exception/InvalidOrderOfOperationsException.cpp
new file mode 100644
index 0000000..cb51650
--- /dev/null
+++ b/src/exception/InvalidOrderOfOperationsException.cpp
@@ -0,0 +1,24 @@
+/*
+ * InvalidOrderOfOperationsException.cpp
+ *
+ *  Created on: Apr 26, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "InvalidOrderOfOperationsException.h"
+
+#include <string>
+#include <iostream>
+
+using namespace std;
+namespace exception{
+
+InvalidOrderOfOperationsException::InvalidOrderOfOperationsException(string massage) {
+	cerr << "Invalid Order Of Operations Exception" << endl;
+	cerr << massage << endl;
+}
+
+InvalidOrderOfOperationsException::~InvalidOrderOfOperationsException() {
+	// TODO Auto-generated destructor stub
+}
+}
diff --git a/src/exception/InvalidOrderOfOperationsException.h b/src/exception/InvalidOrderOfOperationsException.h
new file mode 100644
index 0000000..b813d1a
--- /dev/null
+++ b/src/exception/InvalidOrderOfOperationsException.h
@@ -0,0 +1,23 @@
+/*
+ * InvalidOrderOfOperationsException.h
+ *
+ *  Created on: Apr 26, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef INVALIDORDEROFOPERATIONSEXCEPTION_H_
+#define INVALIDORDEROFOPERATIONSEXCEPTION_H_
+
+#include <string>
+
+using namespace std;
+
+namespace exception{
+	class InvalidOrderOfOperationsException {
+	public:
+		InvalidOrderOfOperationsException(string);
+		~InvalidOrderOfOperationsException();
+	};
+}
+
+#endif /* INVALIDORDEROFOPERATIONSEXCEPTION_H_ */
diff --git a/src/exception/InvalidScoreException.cpp b/src/exception/InvalidScoreException.cpp
new file mode 100644
index 0000000..2e2829f
--- /dev/null
+++ b/src/exception/InvalidScoreException.cpp
@@ -0,0 +1,24 @@
+/*
+ * InvalidScoreException.cpp
+ *
+ *  Created on: Apr 27, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "InvalidScoreException.h"
+
+#include <string>
+#include <iostream>
+
+using namespace std;
+namespace exception{
+
+InvalidScoreException::InvalidScoreException(string massage) {
+	cerr << "Invalid Score Exception." << endl;
+	cerr << massage << endl;
+}
+
+InvalidScoreException::~InvalidScoreException() {
+	// TODO Auto-generated destructor stub
+}
+}
diff --git a/src/exception/InvalidScoreException.h b/src/exception/InvalidScoreException.h
new file mode 100644
index 0000000..89bdd34
--- /dev/null
+++ b/src/exception/InvalidScoreException.h
@@ -0,0 +1,23 @@
+/*
+ * InvalidScoreException.h
+ *
+ *  Created on: Apr 27, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef INVALIDSCOREEXCEPTION_H_
+#define INVALIDSCOREEXCEPTION_H_
+
+#include <string>
+
+using namespace std;
+
+namespace exception{
+	class InvalidScoreException {
+	public:
+		InvalidScoreException(string);
+		virtual ~InvalidScoreException();
+	};
+}
+
+#endif /* INVALIDSCOREEXCEPTION_H_ */
diff --git a/src/exception/InvalidStateException.cpp b/src/exception/InvalidStateException.cpp
new file mode 100644
index 0000000..d39f985
--- /dev/null
+++ b/src/exception/InvalidStateException.cpp
@@ -0,0 +1,25 @@
+/*
+ * InvalidStateException.cpp
+ *
+ *  Created on: Aug 9, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include <iostream>
+#include <string>
+#include "InvalidStateException.h"
+
+using namespace std;
+
+
+namespace exception {
+InvalidStateException::InvalidStateException(string msg) :
+		std::runtime_error(msg) {
+	cerr << "Invalid State Exception." << endl;
+	cerr << what() << endl;
+}
+}
+
+//InvalidStateException::~InvalidStateException() {
+// TODO Auto-generated destructor stub
+//}
diff --git a/src/exception/InvalidStateException.h b/src/exception/InvalidStateException.h
new file mode 100644
index 0000000..826e59f
--- /dev/null
+++ b/src/exception/InvalidStateException.h
@@ -0,0 +1,23 @@
+/*
+ * InvalidStateException.h
+ *
+ *  Created on: Aug 9, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef INVALIDSTATEEXCEPTION_H_
+#define INVALIDSTATEEXCEPTION_H_
+
+#include <string>
+#include <stdexcept>
+
+using namespace std;
+
+namespace exception {
+	class InvalidStateException : public std::runtime_error{
+	public:
+		InvalidStateException(string);
+	};
+}
+
+#endif /* INVALIDSTATEEXCEPTION_H_ */
diff --git a/src/nonltr/ChromDetector.cpp b/src/nonltr/ChromDetector.cpp
new file mode 100644
index 0000000..58d3a7a
--- /dev/null
+++ b/src/nonltr/ChromDetector.cpp
@@ -0,0 +1,41 @@
+/*
+ * ChromDetector.cpp
+ *
+ *  Created on: Nov 8, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include <vector>
+
+#include "ChromDetector.h"
+#include "Detector.h"
+#include "../utility/Util.h"
+
+using namespace std;
+using namespace nonltr;
+using namespace utility;
+
+ChromDetector::ChromDetector(double s, double w, double pDelta, double b,
+		double mDelta, vector<int> * scores,
+		const vector<vector<int> *> * segmentList) {
+
+	regions = new vector<vector<int> *>();
+
+	for (int i = 0; i < segmentList->size(); i++) {
+		Detector * detector = new Detector(segmentList->at(i)->at(0),
+				segmentList->at(i)->at(1), s, w, pDelta, b, mDelta, scores);
+		vector<vector<int> *> * segRegions = detector->getRegions();
+		regions->insert(regions->end(), segRegions->begin(), segRegions->end());
+		delete detector;
+	}
+}
+
+ChromDetector::~ChromDetector() {
+	Util::deleteInVector(regions);
+	regions->clear();
+	delete regions;
+}
+
+vector<vector<int> *> * ChromDetector::getRegions() {
+	return regions;
+}
diff --git a/src/nonltr/ChromDetector.h b/src/nonltr/ChromDetector.h
new file mode 100644
index 0000000..e745295
--- /dev/null
+++ b/src/nonltr/ChromDetector.h
@@ -0,0 +1,29 @@
+/*
+ * ChromDetector.h
+ *
+ *  Created on: Nov 8, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef CHROMDETECTOR_H_
+#define CHROMDETECTOR_H_
+
+#include <vector>
+
+using namespace std;
+
+namespace nonltr{
+class ChromDetector {
+
+private:
+	vector<vector<int> *> * regions;
+
+public:
+	ChromDetector(double, double, double, double, double, vector<int> *,
+			const vector<vector<int> *> *);
+	virtual ~ChromDetector();
+	vector<vector<int> *> * getRegions();
+};
+}
+
+#endif /* CHROMDETECTOR_H_ */
diff --git a/src/nonltr/ChromDetectorMaxima.cpp b/src/nonltr/ChromDetectorMaxima.cpp
new file mode 100644
index 0000000..51f7900
--- /dev/null
+++ b/src/nonltr/ChromDetectorMaxima.cpp
@@ -0,0 +1,94 @@
+/*
+ * ChromDetectorMaxima.cpp
+ *
+ *  Created on: Jun 6, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "ChromDetectorMaxima.h"
+
+namespace nonltr {
+
+ChromDetectorMaxima::ChromDetectorMaxima(double s, double w, double m,
+		double t, double p, int e, vector<int> * oScores,
+		ChromosomeOneDigit * chrom) {
+	header = chrom->getHeader();
+	start(s, w, m, t, p, e, oScores, chrom->getSegment());
+
+}
+
+ChromDetectorMaxima::ChromDetectorMaxima(double s, double w, double m,
+		double t, double p, int e, vector<int> * oScores, const vector<vector<
+				int> *> * segmentList) {
+	header = string("chrUnknown");
+	start(s, w, m, t, p, e, oScores, segmentList);
+}
+
+void ChromDetectorMaxima::start(double s, double w, double m, double t,
+		double p, int e, vector<int> * oScores,
+		const vector<vector<int> *> * segmentList) {
+
+	regionList = new vector<ILocation *> ();
+
+	int segmentCount = segmentList->size();
+	for (int i = 0; i < segmentCount; i++) {
+		int segStart = segmentList->at(i)->at(0);
+		int segEnd = segmentList->at(i)->at(1);
+
+		// The effective length is shorter than the actual length by 2w
+		int effLen = 2 * w + 10;
+		int segLen = segEnd - segStart + 1;
+
+		if (segLen > effLen) {
+			DetectorMaxima * detector = new DetectorMaxima(segStart, segEnd, s,
+					w, m, t, p, e, oScores);
+
+			const vector<ILocation *> * segRegions = detector->getRegionList();
+			int segRegionCount = segRegions->size();
+			for (int h = 0; h < segRegionCount; h++) {
+				regionList->push_back(new Location(*(segRegions->at(h))));
+			}
+
+			delete detector;
+		} else {
+			cout << "\tSkipping a short segment: ";
+			cout << segStart << "-" << segEnd << endl;
+		}
+	}
+}
+
+ChromDetectorMaxima::~ChromDetectorMaxima() {
+	Util::deleteInVector(regionList);
+	regionList->clear();
+	delete regionList;
+}
+
+void ChromDetectorMaxima::printIndex(string outputFile) {
+	printIndex(outputFile, false);
+}
+
+void ChromDetectorMaxima::printIndex(string outputFile, bool canAppend) {
+	ofstream outIndex;
+
+	if (canAppend) {
+		outIndex.open(outputFile.c_str(), ios::out | ios::app);
+	} else {
+		outIndex.open(outputFile.c_str(), ios::out);
+	}
+
+	// Write the index of the repeat segment [x,y[
+	for (int j = 0; j < regionList->size(); j++) {
+		outIndex << header << ":";
+		outIndex << ((int) (regionList->at(j)->getStart())) << "-";
+		outIndex << ((int) (regionList->at(j)->getEnd() + 1)) << " ";
+		outIndex << endl;
+	}
+
+	outIndex.close();
+}
+
+const vector<ILocation*>* ChromDetectorMaxima::getRegionList() const {
+	return regionList;
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/ChromDetectorMaxima.h b/src/nonltr/ChromDetectorMaxima.h
new file mode 100644
index 0000000..c3c58df
--- /dev/null
+++ b/src/nonltr/ChromDetectorMaxima.h
@@ -0,0 +1,47 @@
+/*
+ * ChromDetectorMaxima.h
+ *
+ *  Created on: Jun 6, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef CHROMDETECTORMAXIMA_H_
+#define CHROMDETECTORMAXIMA_H_
+
+#include <fstream>
+#include <vector>
+
+#include "ChromosomeOneDigit.h"
+#include "DetectorMaxima.h"
+
+#include "../utility/Util.h"
+#include "../utility/ILocation.h"
+#include "../utility/Location.h"
+
+using namespace std;
+using namespace utility;
+
+namespace nonltr {
+
+class ChromDetectorMaxima {
+private:
+	vector<ILocation *> * regionList;
+	string header;
+
+	void start(double, double, double, double, double, int, vector<int> *,
+			const vector<vector<int> *> *);
+
+public:
+	ChromDetectorMaxima(double, double, double, double, double, int,
+			vector<int> *, ChromosomeOneDigit *);
+	ChromDetectorMaxima(double, double, double, double, double, int,
+			vector<int> *, const vector<vector<int> *> *);
+	virtual ~ChromDetectorMaxima();
+	const vector<ILocation*>* getRegionList() const;
+	void printIndex(string);
+	void printIndex(string, bool);
+
+};
+
+} /* namespace nonltr */
+#endif /* CHROMDETECTORMAXIMA_H_ */
diff --git a/src/nonltr/ChromListMaker.cpp b/src/nonltr/ChromListMaker.cpp
new file mode 100644
index 0000000..e684c3a
--- /dev/null
+++ b/src/nonltr/ChromListMaker.cpp
@@ -0,0 +1,123 @@
+/*
+ * ChromListMaker.cpp
+ *
+ *  Created on: Mar 13, 2014
+ *      Author: Hani Zakaira Girgis
+ */
+
+#include "ChromListMaker.h"
+
+namespace nonltr {
+
+ChromListMaker::ChromListMaker(string seqFileIn) {
+	seqFile = seqFileIn;
+	chromList = new vector<Chromosome *>();
+}
+
+ChromListMaker::~ChromListMaker() {
+	Util::deleteInVector(chromList);
+	delete chromList;
+}
+
+
+std::istream& safe_getline(std::istream& is, std::string& t)
+{
+	t.clear();
+	std::istream::sentry se(is, true);
+	std::streambuf* sb = is.rdbuf();
+	for(;;) {
+		int c = sb->sbumpc();
+		switch (c) {
+		case '\n':
+			return is;
+		case '\r':
+			if (sb->sgetc() == '\n') {
+				sb->sbumpc();
+			}
+			return is;
+		case std::streambuf::traits_type::eof():
+			if (t.empty()) {
+				is.setstate(std::ios::eofbit);
+			}
+			return is;
+		default:
+			t += (char)c;
+		}
+	}
+}
+
+const vector<Chromosome *> * ChromListMaker::makeChromList() {
+	ifstream in(seqFile.c_str());
+	bool isFirst = true;
+	Chromosome * chrom;
+
+	while (in.good()) {
+		string line;
+		safe_getline(in, line);
+		if (line[0] == '>') {
+			if (!isFirst) {
+				chrom->finalize();
+				chromList->push_back(chrom);
+			} else {
+				isFirst = false;
+			}
+
+			chrom = new Chromosome();
+			chrom->setHeader(line);
+		} else if (line[0] == ' ' || line[0] == '\t') {
+			bool all_spaces = true;
+			for (auto c : line) {
+				if (c != ' ' && c != '\t') {
+					all_spaces = false;
+				}
+			}
+			if (all_spaces) {
+				continue;
+			}
+			std::ostringstream oss;
+			oss << chrom->getHeader() << line;
+			std::string new_header = oss.str();
+			chrom->setHeader(new_header);
+		} else {
+			chrom->appendToSequence(line);
+		}
+	}
+	chrom->finalize();
+	chromList->push_back(chrom);
+	in.close();
+
+	return chromList;
+}
+
+const vector<Chromosome *> * ChromListMaker::makeChromOneDigitList() {
+	ifstream in(seqFile.c_str());
+	bool isFirst = true;
+	ChromosomeOneDigit * chrom;
+
+	while (in.good()) {
+		string line;
+		safe_getline(in, line);
+		if (line[0] == '>') {
+			if (!isFirst) {
+				chrom->finalize();
+				chromList->push_back(chrom);
+			} else {
+				isFirst = false;
+			}
+
+			chrom = new ChromosomeOneDigit();
+			chrom->setHeader(line);
+		} else {
+			chrom->appendToSequence(line);
+		}
+	}
+
+	chrom->finalize();
+	chromList->push_back(chrom);
+	in.close();
+
+	return chromList;
+}
+
+}
+/* namespace nonltr */
diff --git a/src/nonltr/ChromListMaker.h b/src/nonltr/ChromListMaker.h
new file mode 100644
index 0000000..a60fe2f
--- /dev/null
+++ b/src/nonltr/ChromListMaker.h
@@ -0,0 +1,38 @@
+/*
+ * ChromListMaker.h
+ *
+ *  Created on: Mar 13, 2014
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef CHROMLISTMAKER_H_
+#define CHROMLISTMAKER_H_
+
+#include <string>
+#include <vector>
+
+#include "Chromosome.h"
+#include "ChromosomeOneDigit.h"
+
+#include "../utility/Util.h"
+
+using namespace std;
+using namespace utility;
+
+namespace nonltr {
+
+class ChromListMaker {
+private:
+	vector<Chromosome *> * chromList;
+	string seqFile;
+
+public:
+	ChromListMaker(string);
+	virtual ~ChromListMaker();
+	const vector<Chromosome *> * makeChromList();
+	const vector<Chromosome *> * makeChromOneDigitList();
+
+};
+
+} /* namespace nonltr */
+#endif /* CHROMLISTMAKER_H_ */
diff --git a/src/nonltr/Chromosome.cpp b/src/nonltr/Chromosome.cpp
new file mode 100644
index 0000000..2bea802
--- /dev/null
+++ b/src/nonltr/Chromosome.cpp
@@ -0,0 +1,308 @@
+/*
+ * Chromosome.cpp
+ *
+ *  Created on: Mar 26, 2012
+ *      Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH
+ */
+#include "Chromosome.h"
+
+Chromosome::Chromosome() {
+	header = string("");
+	base = string("");
+	isHeaderReady = false;
+	isBaseReady = false;
+	isFinalized = false;
+}
+
+Chromosome::Chromosome(string fileName) {
+	chromFile = fileName;
+	readFasta();
+	help(1000000, true);
+}
+
+Chromosome::Chromosome(string fileName, bool canMerge) {
+	chromFile = fileName;
+	readFasta();
+	help(1000000, canMerge);
+}
+
+Chromosome::Chromosome(string fileName, int len) {
+	chromFile = fileName;
+	readFasta();
+	help(len, true);
+}
+
+Chromosome::Chromosome(string &seq, string &info) {
+	header = info;
+	base = seq;
+	help(1000000, true);
+}
+
+Chromosome::Chromosome(string &seq, string &info, int len) {
+	header = info;
+	base = seq;
+	help(len, true);
+}
+
+void Chromosome::setHeader(string& info) {
+	if (isFinalized) {
+		string msg("This chromosome has been finalized. ");
+		msg.append("The header cannot be modified.");
+		throw InvalidOperationException(msg);
+	} else {
+		header = info;
+		isHeaderReady = true;
+	}
+}
+
+/**
+ * This method can waste memory if the sequence is large.
+ * Consider using the method appendToSequence instead
+ */
+void Chromosome::setSequence(string& seq) {
+	if (isFinalized) {
+		string msg("This chromosome has been finalized. ");
+		msg.append("The sequence cannot be modified.");
+		throw InvalidOperationException(msg);
+	} else {
+		base = seq;
+		isBaseReady = true;
+	}
+}
+
+void Chromosome::appendToSequence(const string& line) {
+	if (isFinalized) {
+		string msg("This chromosome has been finalized. ");
+		msg.append("The sequence cannot be modified.");
+		throw InvalidOperationException(msg);
+	} else {
+		base.append(line);
+		isBaseReady = true;
+	}
+}
+
+void Chromosome::finalize() {
+	if (isFinalized) {
+		string msg("This chromosome has been already finalized. ");
+		msg.append("Finalize can be only called once.");
+		throw InvalidOperationException(msg);
+	} else if (!(isHeaderReady && isBaseReady)) {
+		string msg(
+				"The header and the sequence must be set before calling finalize");
+		throw InvalidOperationException(msg);
+	} else {
+		help(1000000, true);
+		isFinalized = true;
+	}
+}
+
+void Chromosome::help(int len, bool canMerge) {
+	effectiveSize = 0;
+	segLength = len;
+	segment = new vector<vector<int> *>();
+//	segment->reserve(100);
+
+	toUpperCase();
+	removeN();
+	if (canMerge) {
+		mergeSegments();
+	}
+	makeSegmentList();
+	calculateEffectiveSize();
+}
+
+Chromosome::~Chromosome() {
+	base.clear();
+
+	Util::deleteInVector(segment);
+	segment->clear();
+	delete segment;
+}
+
+void Chromosome::readFasta() {
+	bool isFirst = true;
+	header = string("");
+	base = string("");
+
+	ifstream in(chromFile.c_str());
+	while (in.good()) {
+		string line;
+		getline(in, line);
+		if (line[0] == '>') {
+			if (!isFirst) {
+				string msg = "Chromosome file: ";
+				msg = msg + chromFile;
+				msg =
+						msg
+								+ " must have one sequence only. But it has more than one.";
+				throw InvalidInputException(msg);
+			} else {
+				header = line;
+				isFirst = false;
+			}
+		} else {
+			base.append(line);
+		}
+	}
+	in.close();
+}
+
+/**
+ * Convert alphabet to upper case if it has not been done before
+ **/
+void Chromosome::toUpperCase() {
+	for (int i = 0; i < base.length(); i++) {
+		base[i] = toupper(base[i]);
+	}
+}
+
+/**
+ * Segment coordinates are inclusive [s,e]
+ **/
+void Chromosome::removeN() {
+	// Store non-N index
+	int start = -1;
+	for (int i = 0; i < base.size(); i++) {
+		if (base[i] != 'N' && start == -1) {
+			start = i;
+		} else if (base[i] == 'N' && start != -1) {
+			vector<int> * v = new vector<int>();
+			v->push_back(start);
+			v->push_back(i - 1);
+			segment->push_back(v);
+
+			start = -1;
+		} else if (i == base.size() - 1 && base[i] != 'N' && start != -1) {
+			vector<int> * v = new vector<int>();
+			v->push_back(start);
+			v->push_back(i);
+
+			segment->push_back(v);
+			start = -1;
+		}
+	}
+}
+
+/**
+ * If the gap between two consecutive segments is less than 10 bp.
+ * Segments that are shorter than 20 bp are not added.
+ */
+void Chromosome::mergeSegments() {
+	vector<vector<int> *> * mSegment = new vector<vector<int> *>();
+
+	int s = segment->at(0)->at(0);
+	int e = segment->at(0)->at(1);
+
+	for (int i = 1; i < segment->size(); i++) {
+		int s1 = segment->at(i)->at(0);
+		int e1 = segment->at(i)->at(1);
+
+		if (s1 - e < 10) {
+			e = e1;
+		} else {
+			if (e - s + 1 >= 20) {
+				vector<int> * seg = new vector<int>();
+				seg->push_back(s);
+				seg->push_back(e);
+				mSegment->push_back(seg);
+			}
+
+			s = s1;
+			e = e1;
+		}
+	}
+
+	// Handle the last index
+	if (e - s + 1 >= 20) {
+		vector<int> * seg = new vector<int>();
+		seg->push_back(s);
+		seg->push_back(e);
+		mSegment->push_back(seg);
+	}
+
+	Util::deleteInVector(segment);
+	segment->clear();
+	segment = mSegment;
+}
+
+void Chromosome::makeSegmentList() {
+	vector<vector<int> *> * segmentList = new vector<vector<int> *>();
+	int segmentCount = segment->size();
+	for (int oo = 0; oo < segmentCount; oo++) {
+		int s = segment->at(oo)->at(0);
+		int e = segment->at(oo)->at(1);
+
+		if (e - s + 1 > segLength) {
+			int fragNum = (int) (e - s + 1) / segLength;
+
+			for (int h = 0; h < fragNum; h++) {
+				int fragStart = s + (h * segLength);
+				int fragEnd =
+						(h == fragNum - 1) ? e : fragStart + segLength - 1;
+				vector<int> * v = new vector<int>();
+				v->push_back(fragStart);
+				v->push_back(fragEnd);
+				segmentList->push_back(v);
+			}
+		} else {
+			vector<int> * v = new vector<int>();
+			v->push_back(segment->at(oo)->at(0));
+			v->push_back(segment->at(oo)->at(1));
+			segmentList->push_back(v);
+		}
+	}
+
+	Util::deleteInVector(segment);
+	delete segment;
+	segment = segmentList;
+}
+
+const string* Chromosome::getBase() {
+	return &base;
+}
+
+const vector<vector<int> *> * Chromosome::getSegment() {
+	return segment;
+}
+
+void Chromosome::printSegmentList(){
+	int l = segment->size();
+	cout << "Segment list size = " << l << endl;
+	for(int i = 0; i < l; i++){
+		cout << segment->at(i)->at(0) << "\t";
+		cout << segment->at(i)->at(1) << endl;
+	}
+}
+
+string Chromosome::getHeader() {
+	return header;
+}
+
+int Chromosome::size() {
+	return base.size();
+}
+
+void Chromosome::calculateEffectiveSize() {
+	int segmentCount = segment->size();
+	for (int oo = 0; oo < segmentCount; oo++) {
+		int s = segment->at(oo)->at(0);
+		int e = segment->at(oo)->at(1);
+		effectiveSize += (e - s + 1);
+	}
+}
+
+int Chromosome::getEffectiveSize() {
+	return effectiveSize;
+}
+
+int Chromosome::getGcContent() {
+	int gc = 0;
+	int size = base.size();
+	for (int i = 0; i < size; i++) {
+		char n = base.at(i);
+		if (n == 'C' || n == 'G') {
+			gc++;
+		}
+	}
+	return gc;
+}
diff --git a/src/nonltr/Chromosome.h b/src/nonltr/Chromosome.h
new file mode 100644
index 0000000..0632458
--- /dev/null
+++ b/src/nonltr/Chromosome.h
@@ -0,0 +1,78 @@
+/*
+ * Chromosome.h
+ *
+ *  Created on: Mar 26, 2012
+ *      Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH
+ */
+#ifndef CHROMOSOME_H_
+#define CHROMOSOME_H_
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <iostream>
+#include <map>
+
+#include "IChromosome.h"
+#include "../exception/InvalidOperationException.h"
+#include "../exception/InvalidInputException.h"
+#include "../utility/Util.h"
+
+using namespace std;
+using namespace nonltr;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+class Chromosome: public IChromosome {
+public:
+	Chromosome();
+	Chromosome(string);
+	Chromosome(string, bool);
+	Chromosome(string, int);
+	Chromosome(string &, string&);
+	Chromosome(string &, string&, int);
+
+	int getGcContent();
+
+	virtual ~Chromosome();
+
+	virtual const string* getBase();
+	virtual const vector<vector<int> *> * getSegment();
+	virtual void printSegmentList();
+	virtual string getHeader();
+	virtual int size();
+	virtual int getEffectiveSize();
+	virtual void setHeader(string&);
+	virtual void setSequence(string&);
+	virtual void appendToSequence(const string&);
+	virtual void finalize();
+
+
+protected:
+	string chromFile;
+	string header;
+	string base;
+	int effectiveSize;
+	int segLength;
+
+	vector<vector<int> *> * segment;
+	void readFasta();
+	void toUpperCase();
+	void removeN();
+	void mergeSegments();
+	virtual void help(int, bool);
+	void makeSegmentList();
+	void calculateEffectiveSize();
+
+private:
+	bool isHeaderReady;
+	bool isBaseReady;
+	bool isFinalized;
+
+	void reverseSegments();
+
+};
+}
+
+#endif /* CHROMOSOME_H_ */
diff --git a/src/nonltr/ChromosomeOneDigit.cpp b/src/nonltr/ChromosomeOneDigit.cpp
new file mode 100644
index 0000000..9af2c51
--- /dev/null
+++ b/src/nonltr/ChromosomeOneDigit.cpp
@@ -0,0 +1,246 @@
+/*
+ * ChromosomeOneDigit.cpp
+ *
+ *  Created on: Jul 31, 2012
+ *      Author: Hani Zakaria Girgis, PhD at the NCB1/NLM/NIH
+ * A	A
+ * T	T
+ * G	G
+ * C	C
+ * R	G or A
+ * Y	T or C
+ * M	A or C
+ * K	G or T
+ * S	G or C
+ * W	A or T
+ * H	A or C or T
+ * B	G or T or C
+ * V	G or C or A
+ * D	G or T or A
+ * N	G or T or A or C
+ */
+#include <iostream>
+#include <map>
+
+#include "Chromosome.h"
+#include "ChromosomeOneDigit.h"
+#include "../exception/InvalidInputException.h"
+
+using namespace exception;
+
+namespace nonltr {
+
+ChromosomeOneDigit::ChromosomeOneDigit() :
+		Chromosome() {
+}
+
+ChromosomeOneDigit::ChromosomeOneDigit(string fileName) :
+		Chromosome(fileName) {
+	help();
+}
+
+ChromosomeOneDigit::ChromosomeOneDigit(string seq, string info) :
+		Chromosome(seq, info) {
+	help();
+}
+
+void ChromosomeOneDigit::help() {
+	// Build codes
+	buildCodes();
+	// Modify the sequence in the super class
+	encodeNucleotides();
+}
+
+void ChromosomeOneDigit::finalize() {
+	Chromosome::finalize();
+	help();
+}
+
+void ChromosomeOneDigit::buildCodes() {
+	// Make map
+	codes = new map<char, char>();
+
+	// Certain nucleotides
+	codes->insert(map<char, char>::value_type('A', (char) 0));
+	codes->insert(map<char, char>::value_type('C', (char) 1));
+	codes->insert(map<char, char>::value_type('G', (char) 2));
+	codes->insert(map<char, char>::value_type('T', (char) 3));
+
+	// Common uncertain nucleotide
+	// codes->insert(map<char, char>::value_type('N', (char) 4));
+
+	// Uncertain nucleotides
+	codes->insert(map<char, char>::value_type('R', codes->at('G')));
+	codes->insert(map<char, char>::value_type('Y', codes->at('C')));
+	codes->insert(map<char, char>::value_type('M', codes->at('A')));
+	codes->insert(map<char, char>::value_type('K', codes->at('T')));
+	codes->insert(map<char, char>::value_type('S', codes->at('G')));
+	codes->insert(map<char, char>::value_type('W', codes->at('T')));
+	codes->insert(map<char, char>::value_type('H', codes->at('C')));
+	codes->insert(map<char, char>::value_type('B', codes->at('T')));
+	codes->insert(map<char, char>::value_type('V', codes->at('A')));
+	codes->insert(map<char, char>::value_type('D', codes->at('T')));
+	codes->insert(map<char, char>::value_type('N', codes->at('C')));
+	codes->insert(map<char, char>::value_type('X', codes->at('G')));
+}
+
+ChromosomeOneDigit::~ChromosomeOneDigit() {
+	codes->clear();
+	delete codes;
+}
+
+/**
+ * This method converts nucleotides in the segments to single digit codes
+ */
+void ChromosomeOneDigit::encodeNucleotides() {
+
+  for (int s = 0; s < segment->size(); s++) {
+    int segStart = segment->at(s)->at(0);
+    int segEnd = segment->at(s)->at(1);
+    for (int i = segStart; i <= segEnd; i++) {
+      if (codes->count(base[i]) > 0) {
+	base[i] = codes->at(base[i]);
+      } else {
+	string msg = "Invalid nucleotide: ";
+	msg.append(1, base[i]);
+	throw InvalidInputException(msg);
+      }
+    }
+  }
+
+  // Digitize skipped segments
+  int segNum = segment->size();
+  if(segNum > 0){
+    // The first interval - before the first segment
+    int segStart = 0; 
+    int segEnd = segment->at(0)->at(0)-1; 
+
+    for (int s = 0; s <= segNum; s++) {      
+      for (int i = segStart; i <= segEnd; i++) {
+	char c = base[i];
+	if(c != 'N'){
+	  if (codes->count(c) > 0) {
+	    base[i] = codes->at(c);
+	  } else {
+	    string msg = "Invalid nucleotide: ";
+	    msg.append(1, c);
+	    throw InvalidInputException(msg);
+	  }
+	}
+      }
+
+      // The regular intervals between two segments
+      if(s < segNum-1){
+	segStart = segment->at(s)->at(1)+1;
+	segEnd = segment->at(s+1)->at(0)-1;
+      }
+      // The last interval - after the last segment
+      else if(s == segNum - 1){
+	segStart = segment->at(s)->at(1)+1;
+	segEnd = base.size()-1;
+      } 
+    } 
+  }
+}
+
+/*
+void ChromosomeOneDigit::encodeNucleotides() {
+	int seqLen = base.size();
+
+	for (int i = 0; i < seqLen; i++) {
+		if (codes->count(base[i]) > 0) {
+			base[i] = codes->at(base[i]);
+		} else {
+			string msg = "Invalid nucleotide: ";
+			msg.append(1, base[i]);
+			throw InvalidInputException(msg);
+		}
+	}
+
+}
+*/
+
+/**
+ * Cannot be called on already finalized object.
+ */
+void ChromosomeOneDigit::makeR() {
+	//cout << "Making reverse ..." << endl;
+	makeReverse();
+	reverseSegments();
+}
+
+/**
+ * Cannot be called on already finalized object.
+ */
+void ChromosomeOneDigit::makeRC() {
+	//cout << "Making reverse complement ..." << endl;
+	makeComplement();
+	makeReverse();
+	reverseSegments();
+}
+
+void ChromosomeOneDigit::makeComplement() {
+	map<char, char> complement;
+
+	// Certain nucleotides
+	complement.insert(map<char, char>::value_type((char) 0, (char) 3));
+	complement.insert(map<char, char>::value_type((char) 1, (char) 2));
+	complement.insert(map<char, char>::value_type((char) 2, (char) 1));
+	complement.insert(map<char, char>::value_type((char) 3, (char) 0));
+
+	// Unknown nucleotide
+	complement.insert(map<char, char>::value_type('N', 'N'));
+	// complement.insert(map<char, char>::value_type((char) 4, (char) 4));
+
+	// Convert a sequence to its complement
+	int seqLen = base.size();
+	for (int i = 0; i < seqLen; i++) {
+		if (complement.count(base[i]) > 0) {
+			base[i] = complement.at(base[i]);
+		} else {
+			cerr << "Error: The digit " << (char) base[i];
+			cerr << " does not represent a base." << endl;
+			exit(2);
+		}
+	}
+}
+
+void ChromosomeOneDigit::makeReverse() {
+	int last = base.size() - 1;
+
+	// Last index to be switched
+	int middle = base.size() / 2;
+
+	for (int i = 0; i < middle; i++) {
+		char temp = base[last - i];
+		base[last - i] = base[i];
+		base[i] = temp;
+	}
+}
+
+void ChromosomeOneDigit::reverseSegments() {
+	int segNum = segment->size();
+	int lastBase = size() - 1;
+
+	// Calculate the coordinate on the main strand
+	for (int i = 0; i < segNum; i++) {
+		vector<int> * seg = segment->at(i);
+
+		int s = lastBase - seg->at(1);
+		int e = lastBase - seg->at(0);
+		seg->clear();
+		seg->push_back(s);
+		seg->push_back(e);
+	}
+
+	// Reverse the regions within the list
+	int lastRegion = segNum - 1;
+	int middle = segNum / 2;
+	for (int i = 0; i < middle; i++) {
+		vector<int> * temp = segment->at(lastRegion - i);
+		(*segment)[lastRegion - i] = segment->at(i);
+		(*segment)[i] = temp;
+	}
+}
+
+}
diff --git a/src/nonltr/ChromosomeOneDigit.h b/src/nonltr/ChromosomeOneDigit.h
new file mode 100644
index 0000000..384698f
--- /dev/null
+++ b/src/nonltr/ChromosomeOneDigit.h
@@ -0,0 +1,43 @@
+/*
+ * ChromosomeOneDigit.h
+ *
+ *  Created on: Jul 31, 2012
+ *      Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH
+ */
+
+#ifndef CHROMOSOMEONEDIGIT_H_
+#define CHROMOSOMEONEDIGIT_H_
+
+#include <map>
+#include "Chromosome.h"
+
+namespace nonltr {
+class ChromosomeOneDigit: public Chromosome {
+
+private:
+	/* Fields */
+	map<char, char> * codes;
+
+	/* Methods */
+	void help();
+	void buildCodes();
+	void encodeNucleotides();
+
+	void makeReverse();
+	void makeComplement();
+	void reverseSegments();
+
+public:
+	/* Methods */
+	ChromosomeOneDigit();
+	ChromosomeOneDigit(string);
+	ChromosomeOneDigit(string, string);
+	virtual ~ChromosomeOneDigit();
+	virtual void finalize();
+
+	void makeR();
+	void makeRC();
+};
+}
+
+#endif /* CHROMOSOMEONEDIGIT_H_ */
diff --git a/src/nonltr/ChromosomeRandom.cpp b/src/nonltr/ChromosomeRandom.cpp
new file mode 100644
index 0000000..68ae15b
--- /dev/null
+++ b/src/nonltr/ChromosomeRandom.cpp
@@ -0,0 +1,363 @@
+/*
+ * ChromosomeRandom.cpp
+ *
+ *  Created on: Feb 4, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ *
+ */
+
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <cstdlib>
+#include <ctime>
+
+#include "ChromosomeRandom.h"
+#include "../exception/InvalidInputException.h"
+#include "../exception/InvalidStateException.h"
+#include "../utility/Util.h"
+
+using namespace std;
+using namespace exception;
+using namespace utility;
+
+namespace nonltr {
+
+ChromosomeRandom::ChromosomeRandom(int nIn, IChromosome* oChromIn,
+		char unreadIn, vector<char>* alphaIn) {
+	// Check the order
+	if (nIn < 0) {
+		string msg("The Markov order must be non-negative. ");
+		msg.append("The order received is: ");
+		msg.append(Util::int2string(nIn));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	// n here is the length of the word, i.e. the order + 1
+	n = nIn + 1;
+	oChrom = oChromIn;
+	unread = unreadIn;
+	alpha = alphaIn;
+
+	// Initialize the random sequence
+	int size = oChrom->getBase()->size();
+	rBase = new string(size, unread);
+
+	// Initialize key list
+	keyList = new vector<string>();
+
+	// Initialize the table
+	table = new map<string, double>();
+
+	// Handle unusual characters in the first word of a segment
+	// Make map
+	codes = new map<char, char>();
+	codes->insert(map<char, char>::value_type('A', 'A'));
+	codes->insert(map<char, char>::value_type('C', 'C'));
+	codes->insert(map<char, char>::value_type('G', 'G'));
+	codes->insert(map<char, char>::value_type('T', 'T'));
+	codes->insert(map<char, char>::value_type('R', 'G'));
+	codes->insert(map<char, char>::value_type('Y', 'C'));
+	codes->insert(map<char, char>::value_type('M', 'A'));
+	codes->insert(map<char, char>::value_type('K', 'T'));
+	codes->insert(map<char, char>::value_type('S', 'G'));
+	codes->insert(map<char, char>::value_type('W', 'T'));
+	codes->insert(map<char, char>::value_type('H', 'C'));
+	codes->insert(map<char, char>::value_type('B', 'T'));
+	codes->insert(map<char, char>::value_type('V', 'A'));
+	codes->insert(map<char, char>::value_type('D', 'T'));
+	codes->insert(map<char, char>::value_type('N', 'C'));
+	codes->insert(map<char, char>::value_type('X', 'G'));
+
+	// Start operations
+	cout << "\tFilling key list ..." << endl;
+	fillKeyList();
+
+	cout << "\tInitializing table ..." << endl;
+	initializeTable();
+
+	cout << "\tCounting words ..." << endl;
+	countWords();
+
+	cout << "\tCalculating probabilities ..." << endl;
+	convertToProbabilities();
+
+	//cout << "\tPrinting the table ..." << endl;
+	//printTable();
+
+	cout << "\tGenerating the random sequence ..." << endl;
+	generateRandomSequence();
+}
+
+ChromosomeRandom::~ChromosomeRandom() {
+	codes->clear();
+	delete codes;
+
+	keyList->clear();
+	delete keyList;
+
+	table->clear();
+	delete table;
+
+	delete rBase;
+}
+
+void ChromosomeRandom::fillKeyList() {
+	// Collect keys
+	int alphaCount = alpha->size();
+
+	// Order 0
+
+	for (int h = 0; h < alphaCount; h++) {
+		string s("");
+		s.append(1, alpha->at(h));
+		keyList->push_back(s);
+	}
+
+	// Order 1 and higher
+	for (int g = 1; g < n; g++) {
+		vector<string> o;
+		int keyListSize = keyList->size();
+		for (int i = 0; i < keyListSize; i++) {
+			for (int j = 0; j < alphaCount; j++) {
+				string s(keyList->at(i));
+				s.append(1, alpha->at(j));
+				o.push_back(s);
+			}
+		}
+		keyList->clear();
+		(*keyList) = o;
+	}
+}
+
+void ChromosomeRandom::initializeTable() {
+	int keyListSize = keyList->size();
+	for (int i = 0; i < keyListSize; i++) {
+		table->insert(valType(keyList->at(i), 1));
+	}
+}
+
+void ChromosomeRandom::countWords() {
+	// Get the original sequence
+	const string* oBase = oChrom->getBase();
+
+	// Count words
+	const vector<vector<int> *> * segmentList = oChrom->getSegment();
+	int segmentCount = segmentList->size();
+	for (int i = 0; i < segmentCount; i++) {
+		int s = segmentList->at(i)->at(0);
+		int e = segmentList->at(i)->at(1);
+		if (e - s + 1 >= n) {
+
+			int limit = e - n + 1;
+
+			for (int h = s; h <= limit; h++) {
+				// Check if the current base is a standard one.
+				// Words including non-standard bases are not counted.
+
+				char c = oBase->at(h);
+
+				int alphaCount = alpha->size();
+				bool isStandard = false;
+				for (int a = 0; a < alphaCount; a++) {
+					if (alpha->at(a) == c) {
+						isStandard = true;
+						break;
+					}
+				}
+
+				// Increment the count
+				if (isStandard) {
+					string word = oBase->substr(h, n);
+					if (table->count(word) > 0) {
+						(*table)[word] = table->at(word) + 1;
+					} else {
+						cout << "\t\tIgnoring " << word << endl;
+					}
+				}
+			}
+		}
+	}
+}
+
+void ChromosomeRandom::convertToProbabilities() {
+	int alphaCount = alpha->size();
+	int keyListSize = keyList->size();
+	for (int i = 0; i < keyListSize; i += alphaCount) {
+		double sum = 0;
+		for (int j = 0; j < alphaCount; j++) {
+			string key = keyList->at(i + j);
+			sum += table->at(key);
+		}
+		for (int j = 0; j < alphaCount; j++) {
+			string key = keyList->at(i + j);
+			(*table)[key] = ((double) table->at(key)) / sum;
+		}
+	}
+}
+
+void ChromosomeRandom::generateRandomSequence() {
+	// Get the original sequence
+	const string* oBase = oChrom->getBase();
+
+	// Alphabet count
+	int alphaCount = alpha->size();
+
+	// Get the original segments
+	const vector<vector<int> *> * segmentList = oChrom->getSegment();
+	int segmentCount = segmentList->size();
+
+	// Generate random segments
+	for (int i = 0; i < segmentCount; i++) {
+		int s = segmentList->at(i)->at(0);
+		int e = segmentList->at(i)->at(1);
+
+		if (e - s + 1 > n) {
+			//string order = oBase->substr(s, n - 1);
+			string order("");
+			// The first order is based on the original sequence.
+			for (int w = s; w < s + n - 1; w++) {
+				(*rBase)[w] = codes->at(oBase->at(w));
+				order.append(1, codes->at(oBase->at(w)));
+			}
+
+			for (int h = s + n - 1; h <= e; h++) {
+				// Subsequent orders are based on the random sequence.
+				order = rBase->substr(h - n + 1, n - 1);
+				vector<vector<int> > lottery;
+				int chanceSoFar = 0;
+				for (int k = 0; k < alphaCount; k++) {
+					string temp = order;
+					temp.append(1, alpha->at(k));
+					if (table->count(temp) > 0) {
+						int periodStart = chanceSoFar;
+						int periodEnd = periodStart + (100 * table->at(temp));
+						chanceSoFar = periodEnd + 1;
+						vector<int> entry;
+						entry.push_back(alpha->at(k));
+						entry.push_back(periodStart);
+						entry.push_back(periodEnd);
+						lottery.push_back(entry);
+					} else {
+						string msg("This word must exist in the table: ");
+						msg.append(temp);
+						msg.append(".");
+						throw InvalidStateException(msg);
+					}
+				}
+
+				if (lottery.size() > 0) {
+					int randInt = rand() % chanceSoFar;
+
+					for (int tt = 0; tt < alphaCount; tt++) {
+						vector<int> entry = lottery.at(tt);
+						if (randInt >= entry.at(1) && randInt <= entry.at(2)) {
+							(*rBase)[h] = entry.at(0);
+							break;
+						}
+					}
+					lottery.clear();
+				} else {
+					string msg("The lottery vector cannot be empty.");
+					throw InvalidStateException(msg);
+				}
+			}
+		}
+	}
+
+	// Make sure that the generated sequence has the same length as the original sequence
+	if (oBase->size() != rBase->size()) {
+		cerr << "The original sequence and the random sequence ";
+		cerr << "do not have the same size." << endl;
+		cerr << "Original sequence size is: " << oBase->size() << endl;
+		cerr << "Generated sequence size is: " << rBase->size() << endl;
+	}
+}
+
+void ChromosomeRandom::printTable() {
+	map<string, double>::iterator iterStart = table->begin();
+	map<string, double>::iterator iterEnd = table->end();
+	while (iterStart != iterEnd) {
+		cout << (*iterStart).first << " -> " << (*iterStart).second << endl;
+		iterStart++;
+	}
+}
+
+/**
+ * Returns the segments of the original chromosome
+ */
+const vector<vector<int> *> * ChromosomeRandom::getSegment() {
+	return oChrom->getSegment();
+}
+
+/**
+ * Returns the random sequence
+ */
+const string* ChromosomeRandom::getBase() {
+	return rBase;
+}
+
+/**
+ * Returns the header indicating the order of the Markov chain
+ */
+string ChromosomeRandom::getHeader() {
+	string header = oChrom->getHeader();
+//header.append(" - Random based on ");
+//header.append(Util::int2string(n - 1));
+//header.append("-order Markov chain.");
+	return header;
+}
+
+void ChromosomeRandom::printEffectiveSequence(string outputFile) {
+	int totalSize = rBase->size();
+	string * effectiveRBase = new string("");
+	for (int i = 0; i < totalSize; i++) {
+		char b = rBase->at(i);
+		if (b != unread) {
+			effectiveRBase->append(1, b);
+		}
+	}
+
+	// Make sure that the effective sequence is shorter than the original
+	// length
+	if (effectiveRBase->size() > totalSize) {
+		cerr << "The effective length must be <= the original length." << endl;
+		cerr << "Generated sequence size is: " << totalSize << endl;
+		cerr << "The effective size is: " << effectiveRBase->size() << endl;
+
+	}
+
+	printSequence(outputFile, effectiveRBase);
+
+	delete effectiveRBase;
+}
+
+void ChromosomeRandom::printSequence(string outputFile) {
+	printSequence(outputFile, rBase);
+}
+
+void ChromosomeRandom::printSequence(string outputFile, string * baseToPrint) {
+	cout << "Printing chromosome to file ..." << endl;
+	ofstream outSequence;
+	outSequence.open(outputFile.c_str(), ios::out);
+
+	int step = 50;
+
+	outSequence << getHeader() << endl;
+	int len = baseToPrint->size();
+
+	for (int i = 0; i < len; i = i + step) {
+		int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1;
+		for (int k = i; k <= e; k++) {
+			outSequence << baseToPrint->at(k);
+		}
+		outSequence << endl;
+	}
+	outSequence << endl;
+
+	outSequence.close();
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/ChromosomeRandom.h b/src/nonltr/ChromosomeRandom.h
new file mode 100644
index 0000000..a837575
--- /dev/null
+++ b/src/nonltr/ChromosomeRandom.h
@@ -0,0 +1,51 @@
+/*
+ * ChromosomeRandom.h
+ *
+ *  Created on: Feb 4, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef CHROMOSOMERANDOM_H_
+#define CHROMOSOMERANDOM_H_
+
+#include <map>
+
+#include "IChromosome.h"
+
+namespace nonltr {
+
+class ChromosomeRandom: public nonltr::IChromosome {
+	// Key-value pair type.
+	typedef map<string, double>::value_type valType;
+
+private:
+	int n;
+	char unread;
+	IChromosome * oChrom;
+	vector<char> * alpha;
+	map<string, double> * table;
+	string * rBase;
+	vector<string> * keyList;
+	map<char, char> * codes;
+
+	void fillKeyList();
+	void initializeTable();
+	void countWords();
+	void convertToProbabilities();
+	void printTable();
+	void generateRandomSequence();
+
+public:
+	ChromosomeRandom(int, IChromosome*, char, vector<char>*);
+	virtual ~ChromosomeRandom();
+
+	virtual const string* getBase();
+	virtual const vector<vector<int> *> * getSegment();
+	virtual string getHeader();
+	virtual void printSequence(string);
+	void printSequence(string, string *);
+	void printEffectiveSequence(string);
+};
+
+} /* namespace nonltr */
+#endif /* CHROMOSOMERANDOM_H_ */
diff --git a/src/nonltr/DetectorMaxima.cpp b/src/nonltr/DetectorMaxima.cpp
new file mode 100644
index 0000000..90043e3
--- /dev/null
+++ b/src/nonltr/DetectorMaxima.cpp
@@ -0,0 +1,518 @@
+/*
+ * DetectorMaxima.cpp
+ *
+ *  Created on: May 31, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "DetectorMaxima.h"
+#include "../utility/Util.h"
+#include "../utility/Location.h"
+#include "../exception/InvalidStateException.h"
+
+#include <cmath>
+// Delete start
+#include <iostream>
+using namespace std;
+// Delete end
+
+using namespace exception;
+
+namespace nonltr {
+
+DetectorMaxima::DetectorMaxima(int segStartIn, int segEndIn, double sIn,
+		double wIn, double mIn, double tIn, double pIn, int eIn,
+		vector<int> * oScoresIn) {
+
+	// ToDo: make sure that segStart and segEnd are within the input scores.
+	segStart = segStartIn;
+	segEnd = segEndIn;
+	s = sIn;
+	w = wIn;
+	m = mIn;
+	t = tIn;
+	p = pIn;
+	e = eIn;
+	oScores = oScoresIn;
+
+	halfS = s;
+	//s / 2;
+
+	mask = new vector<double>();
+	// Complete
+	scores = new vector<double>();
+
+	// Trimmed on both sides
+	first = new vector<double>();
+
+	// Trimmed on both sides
+	second = new vector<double>();
+
+	// Coordinates according to the complete sequence
+	maxima = new vector<int>();
+
+	// Coordinates according to the complete sequence
+	// allMaxima = new vector<vector<double> *>();
+
+	// Coordinates according to the complete sequence
+	separatorList = new vector<ILocation *>();
+
+	// Coordinates according to the complete sequence
+	regionList = new vector<ILocation *>();
+
+	makeMask();
+
+	smooth();
+
+	deriveFirst();
+
+	deriveSecond();
+
+	// Free memory start
+	mask->clear();
+	delete mask;
+	scores->clear();
+	delete scores;
+	// Free memory end
+
+	findMaxima();
+
+	// Free memory start
+	first->clear();
+	delete first;
+	second->clear();
+	delete second;
+	// Free memory end
+
+	findSeparators();
+
+	findRegions();
+
+	// Free memory start
+	maxima->clear();
+	delete maxima;
+	Util::deleteInVector(separatorList);
+	separatorList->clear();
+	delete separatorList;
+	// Free memory end
+
+	extendRegions();
+}
+
+/*
+ const vector<vector<double> *>* DetectorMaxima::getAllMaxima() const {
+ return allMaxima;
+ }
+ */
+
+const vector<double>* DetectorMaxima::getFirst() const {
+	return first;
+}
+
+const vector<double>* DetectorMaxima::getSecond() const {
+	return second;
+}
+
+const vector<ILocation*> * DetectorMaxima::getRegionList() const {
+	return regionList;
+}
+
+DetectorMaxima::~DetectorMaxima() {
+	/*
+	 Util::deleteInVector (allMaxima);
+	 allMaxima->clear();
+	 delete allMaxima;
+	 */
+
+	Util::deleteInVector(regionList);
+	regionList->clear();
+	delete regionList;
+}
+
+void DetectorMaxima::makeMask() {
+	const double PI = 3.14159265358979323846;
+	double sigma = (double) s / 3.5;
+	const double PART_1 = 1 / sqrt(2 * PI * pow(sigma, 2));
+
+	int l = 2 * s + 1;
+	for (int i = 0; i < l; i++) {
+		double g = PART_1 * exp(-1 * pow(i - s, 2) / (2 * pow(sigma, 2)));
+		mask->push_back(g);
+	}
+
+	// For testing only
+	/*
+	for (int i = 0; i < l; i++) {
+		cout << i << "\t" << mask->at(i) << endl;
+	}
+	cout << endl;
+	cout << endl;
+	*/
+	// End testing
+}
+
+void DetectorMaxima::smooth() {
+	for (int i = segStart; i <= segEnd; i++) {
+		int winS = i - s;
+		int maskS = 0;
+		if (winS < segStart) {
+			maskS = -1 * (winS - segStart);
+			winS = segStart;
+		}
+
+		int winE = (i + s > segEnd) ? segEnd : i + s;
+		// int winL = winE - winS + 1;
+
+		double sum = 0.0;
+		double maskSum = 0.0;
+
+		int j = winS;
+		int h = maskS;
+
+		while (j <= winE) {
+			double weight = mask->at(h);
+			sum += oScores->at(j) * weight;
+			maskSum += weight;
+
+			j++;
+			h++;
+		}
+
+		if (maskSum <= 0.0) {
+			string msg("The sum of the weights in the mask must be > 0");
+			throw InvalidStateException(msg);
+		}
+
+		scores->push_back(sum / maskSum);
+		// scores->push_back(sum / winL);
+	}
+
+	// Testing - start
+	/*
+	cout << "The smoothed scores ... " << endl;
+	for (int k = 0; k < scores->size(); k++) {
+		if (k % 25 == 0) {
+			cout << endl;
+		}
+		cout << scores->at(k) << " ";
+	}
+	cout << endl;
+	cout << endl;
+	*/
+	// Testing - end
+}
+
+void DetectorMaxima::deriveFirst() {
+	double l = 0.0;
+	double r = 0.0;
+
+	for (int i = 0; i < w; i++) {
+		l += scores->at(i);
+	}
+
+	for (int i = w + 1; i <= 2 * w; i++) {
+		r += scores->at(i);
+	}
+
+	first->push_back(round(-1 * l + r));
+
+	for (int i = w + 1; i < scores->size() - w; i++) {
+		l -= scores->at(i - w - 1);
+		l += scores->at(i - 1);
+		r -= scores->at(i);
+		r += scores->at(i + w);
+		first->push_back(round(-1 * l + r));
+	}
+
+	// For testing only
+	/*
+	 for (int i = 0; i < first->size(); i++) {
+	 cout << first->at(i) << " ";
+	 }
+	 cout << endl;
+	 */
+}
+
+void DetectorMaxima::deriveSecond() {
+	double l = 0.0;
+	double r = 0.0;
+	double d = 2 * w;
+
+	for (int i = 0; i < w; i++) {
+		l += scores->at(i);
+	}
+
+	for (int i = w + 1; i <= 2 * w; i++) {
+		r += scores->at(i);
+	}
+
+	second->push_back(round(l + r - d * scores->at(w)));
+
+	for (int i = w + 1; i < scores->size() - w; i++) {
+		l -= scores->at(i - w - 1);
+		l += scores->at(i - 1);
+		r -= scores->at(i);
+		r += scores->at(i + w);
+		second->push_back(round(l + r - d * scores->at(i)));
+	}
+
+	// For testing only
+	/*
+	 for (int i = 0; i < second->size(); i++) {
+	 cout << second->at(i) << " ";
+	 }
+	 cout << endl;
+	 */
+}
+
+void DetectorMaxima::findMaxima() {
+	int firstSize = first->size();
+
+	for (int i = 1; i < firstSize; i++) {
+		double magnitude = abs(first->at(i - 1) - first->at(i));
+
+		if (first->at(i) == 0 || (first->at(i - 1) < 0 & first->at(i) > 0)
+				|| (first->at(i - 1) > 0 && first->at(i) < 0)) {
+			if (second->at(i) < 0) {
+				// Adjust index
+				int peakIndex = i + w + segStart;
+
+				// Record the index of the peak and its magnitude
+				/*
+				 vector<double> * pair = new vector<double>();
+				 pair->push_back(peakIndex);
+				 pair->push_back(magnitude);
+				 allMaxima->push_back(pair);
+				 */
+
+				// Make sure that the peak is in a high-scoring region of width s centered on the peak
+				if (magnitude > m) {
+					// Make sure that the peak is in a high-scoring region of width s centered on the peak
+					int peakStart = peakIndex - halfS;
+					if (peakStart < segStart) {
+						peakStart = segStart;
+					}
+					int peakEnd = peakIndex + halfS;
+					if (peakEnd > segEnd) {
+						peakEnd = segEnd;
+					}
+
+					double count = countLessThan(oScores, peakStart, peakEnd,
+							t);
+					double v = (100.00 * count)
+							/ ((double) peakEnd - peakStart + 1);
+					if (v < p) {
+						maxima->push_back(peakIndex);
+					}
+				}
+			}
+		}
+	}
+
+	// Testing - start
+	/*
+	cout << "Maxima: " << endl;
+	for (int i = 0; i < maxima->size(); i++) {
+		cout << maxima->at(i) << " ";
+	}
+	cout << endl << endl;
+	*/
+	// Testing - end
+}
+
+int DetectorMaxima::countLessThan(vector<int> * list, int s, int e, double t) {
+	int count = 0;
+	for (int u = s; u <= e; u++) {
+		if (list->at(u) < t) {
+			count++;
+		}
+	}
+	return count;
+}
+
+void DetectorMaxima::findSeparators() {
+	int n = maxima->size();
+
+	if (n > 0) {
+		for (int i = 0; i < n - 1; i++) {
+			int j = i + 1;
+			int s = maxima->at(i);
+			int e = maxima->at(j);
+
+			double count = countLessThan(oScores, s, e, t);
+			double v = (100.00 * count) / ((double) e - s + 1);
+			if (v >= p) {
+				separatorList->push_back(new Location(s, e));
+			}
+		}
+	}
+
+	// For testing only
+	/*
+	 cout << "Separators: " << endl;
+	 for (int h = 0; h < separatorList->size(); h++) {
+	 cout << separatorList->at(h)->toString() << endl;
+	 }
+	 cout << endl;
+	 */
+}
+
+void DetectorMaxima::findRegions() {
+	// Determine regions
+	int maximaCount = maxima->size();
+	if (maximaCount > 0) {
+		int segStart = maxima->at(0);
+		int separatorCount = separatorList->size();
+		for (int k = 0; k < separatorCount; k++) {
+			int segEnd = separatorList->at(k)->getStart();
+			regionList->push_back(new Location(segStart, segEnd));
+			segStart = separatorList->at(k)->getEnd();
+		}
+		regionList->push_back(
+				new Location(segStart, maxima->at(maximaCount - 1)));
+	}
+
+	// For testing only
+	/*
+	 cout << "Regions: " << endl;
+	 for (int r = 0; r < regionList->size(); r++) {
+	 cout << regionList->at(r)->toString() << endl;
+	 }
+	 cout << endl;
+	 */
+	// End testing
+}
+
+/*
+ *
+ */
+void DetectorMaxima::extendRegions() {
+	int regionCount = regionList->size();
+	int gg = 0;
+	while (gg < regionCount) {
+		ILocation * region = regionList->at(gg);
+
+		int regionStart = region->getStart();
+		int regionEnd = region->getEnd();
+
+		// Handle the case where the region is made of one nucleotide
+		if (regionStart == regionEnd) {
+			regionStart = regionStart - halfS;
+			if (regionStart < segStart) {
+				regionStart = segStart;
+			}
+			region->setStart(regionStart);
+
+			regionEnd = regionEnd + halfS;
+			if (regionEnd > segEnd) {
+				regionEnd = segEnd;
+			}
+			region->setEnd(regionEnd);
+		}
+
+		// Left end: Extend step by step
+		int lEnd = (gg == 0) ? segStart : regionList->at(gg - 1)->getEnd();
+		for (int u = regionStart; u >= lEnd; u = u - e) {
+			int d = u - e + 1;
+			if (d < lEnd) {
+				d = lEnd;
+			}
+			double v = (100.0 * countLessThan(oScores, d, u, t)) / ((double) e);
+			if (v >= p) {
+				break;
+			} else {
+				regionStart = d;
+			}
+		}
+
+		// Left end: Extend or erode base by base
+		if (oScores->at(regionStart) < t) {
+			for (int a = regionStart; a < regionEnd; a++) {
+				if (oScores->at(a) >= t) {
+					regionStart = a;
+					break;
+				}
+			}
+		} else {
+			for (int a = regionStart; a >= lEnd; a--) {
+				if (oScores->at(a) >= t) {
+					regionStart = a;
+				} else {
+					break;
+				}
+			}
+		}
+
+		// Set new start to check for validity
+		region->setStart(regionStart);
+
+		// Right end: extend to the right step by step
+		int rEnd =
+				(gg == regionCount - 1) ?
+						segEnd : regionList->at(gg + 1)->getStart();
+		for (int u = regionEnd; u <= rEnd; u = u + e) {
+			int d = u + e - 1;
+			if (d > rEnd) {
+				d = rEnd;
+			}
+			double v = (100.0 * countLessThan(oScores, u, d, t)) / ((double) e);
+			if (v >= p) {
+				break;
+			} else {
+				regionEnd = d;
+			}
+		}
+
+		// Right end: extend or erod base by base
+		if (oScores->at(regionEnd) < t) {
+			for (int a = regionEnd; a > regionStart; a--) {
+				if (oScores->at(a) >= t) {
+					regionEnd = a;
+					break;
+				}
+			}
+		} else {
+			for (int a = regionEnd; a <= rEnd; a++) {
+				if (oScores->at(a) >= t) {
+					regionEnd = a;
+				} else {
+					break;
+				}
+			}
+		}
+
+		// Set new end to check for validity
+		region->setEnd(regionEnd);
+
+		// Merge overlapping regions
+		if (gg > 0) {
+			ILocation * pRegion = regionList->at(gg - 1);
+			int pStart = pRegion->getStart();
+			int pEnd = pRegion->getEnd();
+
+			if (Util::isOverlapping(pStart, pEnd, regionStart, regionEnd)) {
+				pRegion->setEnd(regionEnd);
+				regionList->erase(regionList->begin() + gg);
+				regionCount = regionList->size();
+			} else {
+				gg++;
+			}
+		}
+
+		if (gg == 0) {
+			gg++;
+		}
+	}
+
+	// Testing - Start
+	/*
+	 cout << "Extended regions: " << endl;
+	 for (int r = 0; r < regionList->size(); r++) {
+	 cout << regionList->at(r)->toString() << endl;
+	 }
+	 cout << endl;
+	 */
+	// Testing - End
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/DetectorMaxima.h b/src/nonltr/DetectorMaxima.h
new file mode 100644
index 0000000..7aca5d5
--- /dev/null
+++ b/src/nonltr/DetectorMaxima.h
@@ -0,0 +1,77 @@
+/*
+ * DetectorMaxima.h
+ *
+ *  Created on: May 31, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef DETECTORMAXIMA_H_
+#define DETECTORMAXIMA_H_
+
+#include <vector>
+#include <math.h>
+
+#include "../utility/ILocation.h"
+
+using namespace std;
+using namespace utility;
+
+namespace nonltr {
+
+class DetectorMaxima {
+private:
+
+	int segStart;
+	int segEnd;
+	double s;
+	double w;
+	double m;
+	double t;
+	double p;
+	int e;
+	int halfS;
+
+	vector<int> * oScores;
+	vector<double> * scores;
+	vector<double> * mask;
+	vector<double> * first;
+	vector<double> * second;
+	vector<int> * maxima;
+	// vector<vector<double> *> * allMaxima;
+
+	vector<ILocation *> * separatorList;
+	vector<ILocation *> * regionList;
+
+	void makeMask();
+	void smooth();
+	void deriveFirst();
+	void deriveSecond();
+	void findMaxima();
+
+	void findSeparators();
+	void findRegions();
+
+	void extendRegions();
+
+	int countLessThan(vector<int> *, int, int, double);
+
+	/**
+	 * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c
+	 */
+	inline double round(double number) {
+		return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5);
+	}
+
+public:
+	DetectorMaxima(int, int, double, double, double, double, double, int,
+			vector<int> *);
+	virtual ~DetectorMaxima();
+	const vector<ILocation*>* getRegionList() const;
+	const vector<double>* getFirst() const;
+	const vector<double>* getSecond() const;
+
+	// const vector<vector<double> *>* getAllMaxima() const;
+};
+
+} /* namespace nonltr */
+#endif /* DETECTORMAXIMA_H_ */
diff --git a/src/nonltr/EnrichmentMarkovView.cpp b/src/nonltr/EnrichmentMarkovView.cpp
new file mode 100644
index 0000000..f886ac8
--- /dev/null
+++ b/src/nonltr/EnrichmentMarkovView.cpp
@@ -0,0 +1,217 @@
+/*
+ * EnrichmentMarkovView.cpp
+ *
+ *  Created on: Apr 17, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+namespace nonltr {
+
+/**
+ * The Markov order. It start at 0.
+ */
+template<class I, class V>
+EnrichmentMarkovView<I, V>::EnrichmentMarkovView(int k, int order, int m) :
+		minObs(m), factor(10000.00), KmerHashTable<I, V>(k) {
+	initialize(order);
+}
+
+template<class I, class V>
+EnrichmentMarkovView<I, V>::EnrichmentMarkovView(int k, V initValue, int order,
+		int m) :
+		minObs(m), factor(10000.00), KmerHashTable<I, V>(k, initValue) {
+	initialize(order);
+}
+
+template<class I, class V>
+void EnrichmentMarkovView<I, V>::initialize(int order) {
+	// Test start
+	// cout << "Testing: " << minObs << endl;
+	// Test end
+
+	o = order;
+	if (o < 0) {
+		string msg("The Markov order must be non-negative integer. ");
+		msg.append("The invalid input is: ");
+		msg.append(Util::int2string(o));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	if (o >= KmerHashTable<I, V>::k) {
+		string msg("The Markov order cannot be >= k (k-mer).");
+		throw InvalidInputException(msg);
+	}
+
+	l = 0;
+	modelList = new vector<KmerHashTable<int, int> *>();
+
+	for (int i = 1; i <= o + 1; i++) {
+		modelList->push_back(new KmerHashTable<int, int>(i));
+	}
+}
+
+template<class I, class V>
+EnrichmentMarkovView<I, V>::~EnrichmentMarkovView() {
+	Util::deleteInVector(modelList);
+	delete modelList;
+}
+
+/**
+ * This method count words of size 1 to order+1 in the input sequence.
+ * In other words, it updates the background tables. In addition, it
+ * updates the length of the genome.
+ *
+ * sequence: is the input sequence.
+ * start: the start index - inclosing.
+ * end: the end index - inclosing.
+ */
+template<class I, class V>
+void EnrichmentMarkovView<I, V>::count(const char * sequence, int start,
+		int end) {
+
+	// Multiple by 2 if scanning the forward strand and its reverse complement
+	// l = l + (2 * (end - start + 1));
+	l = l + (end - start + 1);
+
+	int modelNumber = modelList->size();
+	for (int i = 0; i < modelNumber; i++) {
+		KmerHashTable<int, int> * t = modelList->at(i);
+		t->wholesaleIncrement(sequence, start, end - i);
+	}
+}
+
+/**
+ * Normalize the count of words in each model.
+ * Values stored in these models are multiplied by "factor."
+ */
+template<class I, class V>
+void EnrichmentMarkovView<I, V>::generateProbapilities() {
+	int modelNumber = modelList->size();
+
+	for (int m = 0; m < modelNumber; m++) {
+		KmerHashTable<int, int> * t = modelList->at(m);
+		int tSize = t->getMaxTableSize();
+
+		for (int i = 0; i < tSize; i += 4) {
+			double sum = 0.0;
+
+			for (int j = i; j < i + 4; j++) {
+				sum += t->valueOf(j);
+			}
+
+			for (int j = i; j < i + 4; j++) {
+				t->insert(j, round(factor * ((double) t->valueOf(j) / sum)));
+			}
+		}
+	}
+}
+
+template<class I, class V>
+void EnrichmentMarkovView<I, V>::processTable() {
+	char base = 4;
+	int modelNumber = modelList->size();
+
+	// Make a zero in quaternary form as a string of length k.
+	string q("");
+	for (int x = 0; x < KmerHashTable<I, V>::k; x++) {
+		q.append(1, 0);
+	}
+
+	double lowerP;
+	double upperP;
+	for (I y = 0; y < KmerHashTable<I, V>::maxTableSize; y++) {
+		if (y % 10000000 == 0) {
+			cout << "Processing " << y << " keys out of "
+					<< KmerHashTable<I, V>::maxTableSize;
+			cout << endl;
+		}
+
+		const char * qc = q.c_str();
+
+		// Calculate the expected number of occurrences.
+
+		// a. Calculate probability from lower order models.
+		// Lower probabilities are the same for four consecutive words of length of k-1
+		if (y % 4 == 0) {
+			lowerP = 1.0;
+			for (int m = 0; m < modelNumber - 1; m++) {
+				KmerHashTable<int, int> * oTable = modelList->at(m);
+				lowerP *= (((double) oTable->valueOf(qc, 0)) / factor);
+			}
+		}
+
+		// b. Calculate probability based on the specified order.
+		KmerHashTable<int, int> * oTable = modelList->at(modelNumber - 1);
+		int resultsSize = KmerHashTable<I, V>::k - o - 1;
+
+		// Upper probabilities are the same for four consecutive words of length of k-1
+		// The scanning of words or length corresponding to the highest order + 1
+		// This step is not needed if k = o + 1, i.e. resultsSize = 0.
+		if (y % 4 == 0) {
+			if (resultsSize > 0) {
+				//Initialize the elements of the vector invalid index
+				vector<int> results = vector<int>(resultsSize, -987);
+				oTable->wholesaleValueOf(qc, 0, resultsSize - 1, &results, 0);
+
+				upperP = 1.0;
+				for (int i = 0; i < resultsSize; i++) {
+					upperP *= (((double) results.at(i)) / factor);
+				}
+				results.clear();
+
+			} else {
+				upperP = 1.0;
+			}
+		}
+
+		// The expected number of occurances
+		double exp = l * lowerP * upperP
+				* (((double) oTable->valueOf(qc, resultsSize)) / factor);
+
+		// Calculate the enrichment value.
+		// Log value
+		// values[y] = round((log((double) values[y] + 1.0) - log(exp + 1.0)));
+
+		// Raw value
+		// Requirement: if observed is >= 5 && observed > expected then the value is the difference
+		// otherwise the value is zero
+
+		V observed = KmerHashTable<I, V>::values[y];
+
+		if (observed >= minObs && observed > exp) {
+
+			KmerHashTable<I, V>::values[y] = round(observed - exp);
+		} else {
+			KmerHashTable<I, V>::values[y] = 0;
+		}
+
+		/*
+		 KmerHashTable<I, V>::values[y] =
+		 round(
+		 (((double) KmerHashTable<I, V>::values[y] + 1.0)
+		 / (exp + 1.0)));
+		 */
+
+		// Increment the quaternary number:
+		// 1 - guard against overflow.
+		if (q[0] == base - 1) {
+			string z("");
+			z.append(1, 0);
+			q = z + q;
+		}
+
+		// 2 - increment the quaternary number by 1.
+		int qLen = q.size();
+		for (int i = qLen - 1; i >= 0; i--) {
+			if (q[i] + 1 < base) {
+				q[i] = q[i] + 1;
+				break;
+			} else {
+				q[i] = 0;
+			}
+		}
+	}
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/EnrichmentMarkovView.h b/src/nonltr/EnrichmentMarkovView.h
new file mode 100644
index 0000000..a10a02a
--- /dev/null
+++ b/src/nonltr/EnrichmentMarkovView.h
@@ -0,0 +1,69 @@
+/*
+ * EnrichmentMarkovView.h
+ *
+ *  Created on: Apr 17, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef ENRICHMENTMARKOVVIEW_H_
+#define ENRICHMENTMARKOVVIEW_H_
+
+#include <cmath>
+#include <vector>
+#include <iostream>
+
+#include "KmerHashTable.h"
+#include "../utility/Util.h"
+#include "../exception/InvalidInputException.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+
+template<class I, class V>
+class EnrichmentMarkovView: public KmerHashTable<I,V>{
+
+private:
+	// The minimum number of the observed k-mers
+	const int minObs;
+
+	// This template specification should work up to order of 14,
+	// i.e. word length = 15
+	vector<KmerHashTable<int,int> *> * modelList;
+
+	// Markov order
+	int o;
+
+	// Total length
+	long l;
+
+	// Multiplied the probability of word by this factor
+	// Equivalent to four decimal points.
+	const double factor;	// = 10000.00;
+
+	// Initialize data members
+	void initialize(int);
+
+	/**
+	 * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c
+	 */
+	inline double round(double number) {
+		return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5);
+	}
+
+public:
+	EnrichmentMarkovView(int, int, int);
+	EnrichmentMarkovView(int, V, int, int);
+	virtual ~EnrichmentMarkovView();
+
+	void count(const char *, int, int);
+	void generateProbapilities();
+	void processTable();
+};
+} /* namespace nonltr */
+
+#include "EnrichmentMarkovView.cpp"
+
+#endif /* ENRICHMENTMARKOVVIEW_H_ */
diff --git a/src/nonltr/HMM.cpp b/src/nonltr/HMM.cpp
new file mode 100644
index 0000000..df4c8b0
--- /dev/null
+++ b/src/nonltr/HMM.cpp
@@ -0,0 +1,630 @@
+/*
+ * HMM.cpp
+ *
+ *  Created on: Jun 21, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "HMM.h"
+
+#include <iostream>
+#include <fstream>
+
+#include "../utility/Util.h"
+#include "../exception/InvalidStateException.h"
+#include "../exception/InvalidInputException.h"
+#include "../exception/FileDoesNotExistException.h"
+#include "../exception/InvalidOperationException.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+
+HMM::HMM(string hmmFile) :
+		PRECISION(numeric_limits<double>::digits10 + 1) {
+	// ToDo: Fix this operation
+	string msg("Reading HMM from file is temporarily disabled.");
+	throw InvalidOperationException(msg);
+
+	cerr << "Building HMM from: " << hmmFile << endl;
+
+	ifstream in(hmmFile.c_str());
+	in.precision(PRECISION);
+
+	if (in) {
+		string token;
+		bool isLogBase = false;
+		bool isStates = false;
+		bool isPriors = false;
+		bool isTransition = false;
+
+		while (in >> token) {
+			if (isLogBase) {
+				base = atof(token.c_str());
+
+				checkBase(base);
+
+				logBase = log(base);
+				isLogBase = false;
+			} else if (isStates) {
+				stateNumber = atoi(token.c_str());
+				positiveStateNumber = stateNumber / 2;
+				initializeHelper();
+
+				isStates = false;
+			} else if (isPriors) {
+				//Skip state names
+				for (int i = 1; i < stateNumber; i++) {
+					in >> token;
+				}
+				for (int i = 0; i < stateNumber; i++) {
+					in >> token;
+					(*pList)[i] = atof(token.c_str());
+				}
+
+				isPriors = false;
+			} else if (isTransition) {
+				//Skip state names
+				for (int i = 1; i < stateNumber; i++) {
+					in >> token;
+				}
+
+				for (int i = 0; i < stateNumber; i++) {
+					//Skip the name of the state at the beginning of the line
+					for (int j = -1; j < stateNumber; j++) {
+						in >> token;
+						if (j > -1) {
+							(*(tList->at(i)))[j] = atof(token.c_str());
+						}
+					}
+				}
+
+				isTransition = false;
+			}
+
+			if (token.compare("Base") == 0) {
+				isLogBase = true;
+			} else if (token.compare("States") == 0) {
+				isStates = true;
+			} else if (token.compare("Priors") == 0) {
+				isPriors = true;
+			} else if (token.compare("Transition") == 0) {
+				isTransition = true;
+			}
+		}
+
+		in.close();
+	} else {
+		string msg(hmmFile);
+		msg.append(" does not exist.");
+		throw FileDoesNotExistException(msg);
+	}
+	in.close();
+
+	//print("/Users/zakarota/Data/HgTest/Rep/Test/genome/hmmTest.txt");
+}
+
+/**
+ * Use this constructor to train on the entire genome.
+ * The client has to call train on each chromosome.
+ * base is the threshold.
+
+ */
+HMM::HMM(double base, int stateNumber) :
+		PRECISION(numeric_limits<double>::digits10 + 1) {
+	initialize(base, stateNumber);
+}
+
+void HMM::initialize(double baseIn, int stateNumberIn) {
+	base = baseIn;
+	checkBase(base);
+
+	logBase = log(baseIn);
+
+	stateNumber = stateNumberIn;
+	// Make sure that the number of states is even and > 0
+	if (stateNumber % 2 != 0 || stateNumber == 0) {
+		string msg("The number of states must be even and > zero.");
+		throw InvalidInputException(msg);
+	}
+
+	positiveStateNumber = stateNumber / 2;
+	cout << "The number of states is: " << stateNumber << endl;
+
+	initializeHelper();
+}
+
+/**
+ * This method makes sure that the base is not zero.
+ */
+void HMM::checkBase(double base) {
+	if (fabs(base - 0.0) < std::numeric_limits<double>::epsilon()) {
+		string msg("The base cannot be zero because log(base) is not defined.");
+		throw InvalidInputException(msg);
+	}
+}
+
+void HMM::initializeHelper() {
+	// Ensure that the number of the states is positive
+	if (stateNumber < 1) {
+		string msg("The number of states must be positive.");
+		throw InvalidStateException(msg);
+	}
+
+	pList = new vector<double>(stateNumber, 1);
+	tList = new vector<vector<double> *>;
+	for (int i = 0; i < stateNumber; i++) {
+		tList->push_back(new vector<double>(stateNumber, 1));
+	}
+	oList = new vector<double>(stateNumber, 1);
+
+	// Check if infinity can be handled
+	if (!std::numeric_limits<double>::has_infinity) {
+		string msg("This compiler does not handle infinite values. ");
+		msg.append(string("The decoding algorithm will not function."));
+		throw InvalidStateException(msg);
+	} else {
+		minusInf = -1.0 * std::numeric_limits<double>::infinity();
+	}
+}
+
+HMM::~HMM() {
+	pList->clear();
+	delete pList;
+
+	Util::deleteInVector(tList);
+	delete tList;
+
+	oList->clear();
+	delete oList;
+}
+
+void HMM::train(vector<int> * scoreListIn,
+		const vector<vector<int> *> * segmentListIn,
+		const vector<ILocation*> * candidateListIn) {
+
+	scoreList = scoreListIn;
+	segmentList = segmentListIn;
+	candidateList = candidateListIn;
+
+	int candidateCount = candidateList->size();
+	if (candidateCount > 0) {
+		int firstCandIndex = 0;
+		int lastCandIndex = 0;
+		int segmentNumber = segmentList->size();
+		for (int i = 0; i < segmentNumber; i++) {
+			vector<int> * s = segmentList->at(i);
+			ILocation * c = candidateList->at(firstCandIndex);
+			// A segment may have no detections
+			if (Util::isOverlapping(s->at(0), s->at(1), c->getStart(),
+					c->getEnd())) {
+				lastCandIndex = trainHelper1(s->at(0), s->at(1),
+						firstCandIndex);
+				trainHelper2(s->at(0), s->at(1), firstCandIndex, lastCandIndex);
+				firstCandIndex = lastCandIndex + 1;
+				if (firstCandIndex >= candidateCount) {
+					break;
+				}
+			}
+		}
+	}
+}
+
+int HMM::trainHelper1(int segStart, int segEnd, int firstCandIndex) {
+	ILocation * cand = candidateList->at(firstCandIndex);
+	if (!Util::isOverlapping(segStart, segEnd, cand->getStart(),
+			cand->getEnd())) {
+		string msg("The first candidate is not overlapping with the segment. ");
+		msg.append("Candidate location is: ");
+		msg.append(cand->toString());
+		msg.append(" Segment location is: ");
+		msg.append(Util::int2string(segStart));
+		msg.append("-");
+		msg.append(Util::int2string(segEnd));
+		throw InvalidInputException(msg);
+	}
+
+	int lastCandIndex = -1;
+	int candidateNumber = candidateList->size();
+	for (int c = firstCandIndex; c < candidateNumber; c++) {
+		ILocation * cand = candidateList->at(c);
+		if (Util::isOverlapping(segStart, segEnd, cand->getStart(),
+				cand->getEnd())) {
+			lastCandIndex = c;
+		} else {
+			break;
+		}
+	}
+
+	if (lastCandIndex < 0) {
+		string msg("The index of the last candidate cannot be negative.");
+		throw InvalidStateException(msg);
+	}
+
+	return lastCandIndex;
+}
+
+void HMM::trainHelper2(int segStart, int segEnd, int firstCandIndex,
+		int lastCandIndex) {
+	ILocation * f = candidateList->at(firstCandIndex);
+
+	// First negative region if present
+	int fStart = f->getStart();
+	if (fStart > segStart) {
+		trainNegative(segStart, fStart - 1);
+		move(getNgtvState(fStart - 1), getPstvState(fStart));
+	}
+
+	// Alternating positive and negative regions
+	for (int i = firstCandIndex; i < lastCandIndex; i++) {
+		ILocation * c = candidateList->at(i);
+		int cStart = c->getStart();
+		int cEnd = c->getEnd();
+		trainPositive(cStart, cEnd);
+		move(getPstvState(cEnd), getNgtvState(cEnd + 1));
+
+		int nextStart = candidateList->at(i + 1)->getStart();
+		trainNegative(cEnd + 1, nextStart - 1);
+		move(getNgtvState(nextStart - 1), getPstvState(nextStart));
+	}
+
+	// Last positive region
+	ILocation * l = candidateList->at(lastCandIndex);
+	int lEnd = l->getEnd();
+	trainPositive(l->getStart(), lEnd);
+
+	// Last negative region if present
+	if (segEnd > lEnd) {
+		move(getPstvState(lEnd), getNgtvState(lEnd + 1));
+		trainNegative(lEnd + 1, segEnd);
+	}
+}
+
+void HMM::trainPositive(int s, int e) {
+	int pIndex = getPstvState(s);
+	(*pList)[pIndex] = pList->at(pIndex) + 1;
+
+	for (int i = s; i <= e; i++) {
+		int index = getPstvState(i);
+		(*oList)[index] = oList->at(index) + 1;
+	}
+
+	for (int i = s; i < e; i++) {
+		move(getPstvState(i), getPstvState(i + 1));
+	}
+}
+
+void HMM::trainNegative(int s, int e) {
+	int pIndex = getNgtvState(s);
+	(*pList)[pIndex] = pList->at(pIndex) + 1;
+
+	for (int i = s; i <= e; i++) {
+		int index = getNgtvState(i);
+		(*oList)[index] = oList->at(index) + 1;
+	}
+
+	for (int i = s; i < e; i++) {
+		move(getNgtvState(i), getNgtvState(i + 1));
+	}
+}
+
+void HMM::move(int state1, int state2) {
+	vector<double> * state1Row = tList->at(state1);
+	(*state1Row)[state2] = state1Row->at(state2) + 1;
+}
+
+void HMM::normalize() {
+// Priors
+	double sum = 0.0;
+	for (int i = 0; i < stateNumber; i++) {
+		sum += pList->at(i);
+	}
+	for (int i = 0; i < stateNumber; i++) {
+		(*pList)[i] = log(pList->at(i) / sum);
+	}
+
+// Output
+	for (int i = 0; i < stateNumber; i++) {
+		(*oList)[i] = log(1.0);
+	}
+
+// Transition
+	for (int i = 0; i < stateNumber; i++) {
+		vector<double> * row = tList->at(i);
+		double sum = 0.0;
+		for (int j = 0; j < stateNumber; j++) {
+			sum += row->at(j);
+		}
+
+		for (int j = 0; j < stateNumber; j++) {
+			(*row)[j] = log(row->at(j) / sum);
+		}
+	}
+}
+
+void HMM::print() {
+	cout.precision(PRECISION);
+
+	// State names
+	vector<string> v;
+	for (int j = 0; j < positiveStateNumber; j++) {
+		v.push_back(Util::int2string(j));
+	}
+	string m("-");
+	for (int j = 0; j < positiveStateNumber; j++) {
+		v.push_back(m + Util::int2string(j));
+	}
+
+	cout << "Priors:" << endl;
+	for (int g = 0; g < 2; g++) {
+		for (int i = 0; i < positiveStateNumber; i++) {
+			cout << v.at(i + (g * positiveStateNumber)) << "\t";
+		}
+
+		for (int i = 0; i < positiveStateNumber; i++) {
+			cout << pList->at(i + (g * positiveStateNumber)) << "\t";
+		}
+		cout << endl;
+	}
+	cout << endl;
+
+	/*
+	 cout << "Output:" << endl;
+	 for (int i = 0; i < v.size(); i++) {
+	 cout << v.at(i) << "\t";
+	 }
+	 cout << endl;
+	 for (int i = 0; i < stateNumber; i++) {
+	 cout << oCountList->at(i) << "\t";
+	 }
+	 cout << endl << endl;
+	 */
+
+	cout << "Transition:" << endl << "\t";
+	for (int i = 0; i < v.size(); i++) {
+		cout << v.at(i) << "\t";
+	}
+	cout << endl;
+
+	for (int i = 0; i < stateNumber; i++) {
+		vector<double> * row = tList->at(i);
+		cout << v.at(i) << "\t";
+		for (int j = 0; j < stateNumber; j++) {
+			cout << row->at(j) << "\t";
+		}
+		cout << endl;
+	}
+	cout << endl << endl;
+}
+
+void HMM::print(string hmo) {
+	ofstream out(hmo.c_str());
+	out.precision(PRECISION);
+
+	out << "Base" << endl << base << endl;
+
+	out << "States" << endl << stateNumber << endl;
+
+	vector<string> v;
+	for (int j = 0; j < positiveStateNumber; j++) {
+		v.push_back(Util::int2string(j));
+	}
+	string m("-");
+	for (int j = 0; j < positiveStateNumber; j++) {
+		v.push_back(m + Util::int2string(j));
+	}
+
+	out << "Priors" << endl;
+	for (int i = 0; i < v.size(); i++) {
+		out << v.at(i) << "    ";
+	}
+	out << endl;
+
+	for (int i = 0; i < v.size(); i++) {
+		out << pList->at(i) << "    ";
+	}
+	out << endl;
+
+	out << "Transition" << endl << "\t";
+	for (int i = 0; i < v.size(); i++) {
+		out << v.at(i) << "\t";
+	}
+	out << endl;
+
+	for (int i = 0; i < stateNumber; i++) {
+		vector<double> * row = tList->at(i);
+		out << v.at(i) << "\t";
+		for (int j = 0; j < stateNumber; j++) {
+			out << row->at(j) << "\t";
+		}
+		out << endl;
+	}
+	out << endl << endl;
+
+	out.close();
+}
+
+/**
+ * This method will append the state sequence to the end of the input state list
+ * This method returns the log likelihood
+ */
+double HMM::decode(int rStart, int rEnd, vector<int> * scoreListIn,
+		vector<int>& stateList) {
+	scoreList = scoreListIn;
+
+	// Make sure that the coordinates represent valid location
+	Location check(rStart, rEnd);
+	// End check
+
+	vector<vector<double> > v(stateNumber);
+	int size = rEnd - rStart + 1;
+	for (int i = 0; i < stateNumber; i++) {
+		v[i] = vector<double>(size, minusInf);
+	}
+
+	vector<vector<int> > p(stateNumber);
+	for (int i = 0; i < stateNumber; i++) {
+		p[i] = vector<int>(size, -1);
+	}
+
+	// Initialize
+	int firstPstvState = getPstvState(rStart);
+	int firstNgtvState = positiveStateNumber + firstPstvState;
+	v[firstPstvState][0] = pList->at(firstPstvState);
+	v[firstNgtvState][0] = pList->at(firstNgtvState);
+
+	// Recurs
+	for (int i = rStart + 1; i <= rEnd; i++) {
+		int vIndex = i - rStart;
+
+		// Obtain states from scores
+		int pPstvState = getPstvState(i - 1);
+		int pNgtvState = positiveStateNumber + pPstvState;
+		int cPstvState = getPstvState(i);
+		int cNgtvState = positiveStateNumber + cPstvState;
+
+		// Set positive state
+		double p1 = v[pPstvState][vIndex - 1]
+				+ (*(*tList)[pPstvState])[cPstvState];
+		double p2 = v[pNgtvState][vIndex - 1]
+				+ (*(*tList)[pNgtvState])[cPstvState];
+		if (p1 > p2) {
+			v[cPstvState][vIndex] = p1;
+			p[cPstvState][vIndex] = pPstvState;
+		} else {
+			v[cPstvState][vIndex] = p2;
+			p[cPstvState][vIndex] = pNgtvState;
+		}
+
+		// Set negative state
+		double p3 = v[pPstvState][vIndex - 1]
+				+ (*(*tList)[pPstvState])[cNgtvState];
+		double p4 = v[pNgtvState][vIndex - 1]
+				+ (*(*tList)[pNgtvState])[cNgtvState];
+		if (p3 > p4) {
+			v[cNgtvState][vIndex] = p3;
+			p[cNgtvState][vIndex] = pPstvState;
+		} else {
+			v[cNgtvState][vIndex] = p4;
+			p[cNgtvState][vIndex] = pNgtvState;
+		}
+	}
+
+	// Decode
+	int lastBestState = 0;
+	double lastBestValue = v[0][size - 1];
+	for (int i = 1; i < stateNumber; i++) {
+		double currentValue = v[i][size - 1];
+		if (currentValue > lastBestValue) {
+			lastBestState = i;
+			lastBestValue = currentValue;
+		}
+	}
+
+	int stateListOriginalSize = stateList.size();
+	for (int i = stateListOriginalSize; i < stateListOriginalSize + size; i++) {
+		stateList.push_back(-1);
+	}
+
+	stateList[stateListOriginalSize + size - 1] = lastBestState;
+	for (int i = size - 1; i > 0; i--) {
+		lastBestState = p[lastBestState][i];
+		stateList[stateListOriginalSize + i - 1] = lastBestState;
+	}
+
+	// Make sure that no state in the results has the value of -1
+	for (int i = stateListOriginalSize; i < stateListOriginalSize + size; i++) {
+		if (stateList[i] == -1) {
+			string msg("At least one state was not determined properly.");
+			throw InvalidStateException(msg);
+		}
+	}
+
+	// Test - start
+	/*
+	 bool canPrint = false;
+	 for (int i = stateListOriginalSize; i < stateListOriginalSize + size; i++) {
+	 if (stateList.at(i) >= positiveStateNumber) {
+	 canPrint = true;
+	 }
+	 }
+	 if (canPrint) {
+	 for (int i = rStart; i <= rEnd; i++) {
+	 cout << scoreList->at(i) << " ";
+	 }
+	 cout << endl;
+
+	 for (int i = stateListOriginalSize; i < stateListOriginalSize + size;
+	 i++) {
+	 if (stateList.at(i) < positiveStateNumber) {
+	 cout << "+";
+	 } else {
+	 cout << "-";
+	 //cout << stateList.at(i) << " ";
+	 }
+	 }
+	 cout << endl;
+	 }
+	 */
+
+	// Test - end
+	return lastBestValue;
+}
+
+/**
+ * Append positive regions at the end of regionList
+ */
+double HMM::decode(int rStart, int rEnd, vector<int> * scoreListIn,
+		vector<ILocation *>& regionList) {
+
+	vector<int> stateList;
+	double logLikelihood = decode(rStart, rEnd, scoreListIn, stateList);
+
+	int size = stateList.size();
+	bool inRpt = false;
+	bool canFill = false;
+	int s = -1;
+	int e = -1;
+
+	for (int i = 0; i < size; i++) {
+		// Start a new repeat
+		if (stateList.at(i) < positiveStateNumber && !inRpt) {
+			inRpt = true;
+			s = i;
+		}
+		// End a the current repeat
+		else if (stateList.at(i) >= positiveStateNumber && inRpt) {
+			e = i - 1;
+			inRpt = false;
+			canFill = true;
+		}
+		// If the current repeat at the end of the segment
+		else if (i == size - 1 && inRpt) {
+			e = i;
+			inRpt = false;
+			canFill = true;
+		}
+		// Extract features of the just recognized repeat
+		if (canFill) {
+			regionList.push_back(new Location(s + rStart, e + rStart));
+			s = -1;
+			e = -1;
+			canFill = false;
+		}
+	}
+
+	return logLikelihood;
+}
+
+int HMM::getPositiveStateNumber() {
+	return positiveStateNumber;
+}
+
+double HMM::getBase() {
+	return base;
+}
+
+}
+/* namespace nonltr */
diff --git a/src/nonltr/HMM.h b/src/nonltr/HMM.h
new file mode 100644
index 0000000..82c7ec1
--- /dev/null
+++ b/src/nonltr/HMM.h
@@ -0,0 +1,103 @@
+/*
+ * HMM.h
+ *
+ *  Created on: Jun 21, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef HMM_H_
+#define HMM_H_
+
+#include <vector>
+#include <math.h>
+#include <limits>
+#include <stdlib.h>
+
+#include "../utility/ILocation.h"
+
+using namespace std;
+using namespace utility;
+
+namespace nonltr {
+
+class HMM {
+private:
+	const int PRECISION;
+	double minusInf;
+	vector<double> * pList;
+	vector<vector<double> *> * tList;
+	vector<double> * oList;
+
+	void initializeHelper();
+	// Returns the index of the last candidate in the segment
+	int trainHelper1(int, int, int);
+	void trainHelper2(int, int, int, int);
+	void trainPositive(int, int);
+	void trainNegative(int, int);
+	void move(int, int);
+	void checkBase(double);
+
+	/*
+	 inline int getPstvState(int score) {
+	 int state = ceil(log(score) / logBase);
+	 if (state < 0) {
+	 state = 0;
+	 }
+	 return state;
+	 }
+
+	 inline int getNgtvState(int score) {
+	 int state = ceil(log(score) / logBase);
+	 if (state < 0) {
+	 state = 0;
+	 }
+	 return state + positiveStateNumber;
+	 }
+	 */
+
+	inline int getPstvState(int index) {
+		int state = scoreList->at(index);
+		return state;
+	}
+
+	inline int getNgtvState(int index) {
+		int state = scoreList->at(index);
+		return state + positiveStateNumber;
+	}
+
+protected:
+	double base;
+	double logBase;
+	int stateNumber;
+	int positiveStateNumber;
+
+	vector<int> * scoreList;
+	const vector<vector<int> *> * segmentList;
+	const vector<ILocation*> * candidateList;
+
+	void initialize(double, int);
+	/**
+	 * Credit: http://stackoverflow.com/questions/554204/where-is-round-in-c
+	 */
+	inline double round(double number) {
+		return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5);
+	}
+
+public:
+	HMM(string); // Build a model from file
+	HMM(double, int);
+	// HMM(vector<int> *, const vector<vector<int> *> *,
+	//		const vector<ILocation*> *, double);
+	virtual ~HMM();
+	void train(vector<int> *, const vector<vector<int> *> *, const vector<ILocation*> *);
+	void normalize();
+	double decode(int, int, vector<int> *, vector<int>&);
+	double decode(int, int, vector<int> *, vector<ILocation *>&);
+	int getPositiveStateNumber();
+	void print();
+	void print(string);
+	double getBase();
+};
+
+} /* namespace nonltr */
+#endif /* HMM_H_ */
diff --git a/src/nonltr/IChromosome.h b/src/nonltr/IChromosome.h
new file mode 100644
index 0000000..8663163
--- /dev/null
+++ b/src/nonltr/IChromosome.h
@@ -0,0 +1,28 @@
+/*
+ * IChromosome.h
+ *
+ *  Created on: Feb 4, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef ICHROMOSOME_H_
+#define ICHROMOSOME_H_
+
+#include <string>
+#include <vector>
+
+using namespace std;
+
+namespace nonltr {
+
+class IChromosome {
+public:
+	//IChromosome();
+	//virtual ~IChromosome();
+	virtual const string* getBase() = 0;
+	virtual const vector<vector<int> *> * getSegment() = 0;
+	virtual string getHeader() = 0;
+};
+
+} /* namespace tr */
+#endif /* ICHROMOSOME_H_ */
diff --git a/src/nonltr/ITableView.h b/src/nonltr/ITableView.h
new file mode 100644
index 0000000..932b4cc
--- /dev/null
+++ b/src/nonltr/ITableView.h
@@ -0,0 +1,34 @@
+/*
+ * ITableView.h
+ *
+ *  Created on: Aug 9, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef ITABLEVIEW_H_
+#define ITABLEVIEW_H_
+
+#include <vector>
+
+using namespace std;
+
+namespace nonltr {
+
+template<class I, class V>
+class ITableView {
+public:
+	virtual V valueOf(const char*) = 0 ;
+	virtual V valueOf(const char*, int) = 0;
+	virtual V valueOf(I) = 0;
+
+	virtual int getK() = 0;
+	virtual I getMaxTableSize() = 0;
+	virtual const V * getValues() const = 0;
+
+	virtual void wholesaleValueOf(const char *, int, int, vector<V> *) = 0;
+	virtual void wholesaleValueOf(const char *, int, int, vector<V> *, int) = 0;
+};
+
+}
+
+#endif /* ITABLEVIEW_H_ */
diff --git a/src/nonltr/KmerHashTable.cpp b/src/nonltr/KmerHashTable.cpp
new file mode 100644
index 0000000..dc53505
--- /dev/null
+++ b/src/nonltr/KmerHashTable.cpp
@@ -0,0 +1,445 @@
+/*
+ * KmerHashTable.cpp
+ *
+ *  Created on: Jul 25, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+#include <stdio.h>
+#include <math.h>
+#include <iostream>
+#include <fstream>
+
+#include "../utility/Util.h"
+#include "../exception/InvalidInputException.h"
+#include "../exception/InvalidStateException.h"
+
+using namespace std;
+using namespace exception;
+using namespace nonltr;
+using namespace utility;
+
+template<class I, class V>
+KmerHashTable<I, V>::KmerHashTable(int keyLength) {
+	initialize(keyLength, 0);
+}
+
+template<class I, class V>
+KmerHashTable<I, V>::KmerHashTable(int keyLength, V initValue) {
+	initialize(keyLength, initValue);
+}
+
+template<class I, class V>
+void KmerHashTable<I, V>::initialize(int keyLength, V initialValueIn) {
+	/*
+	 if (keyLength > maxKeyLength) {
+	 string msg = "The maximum size (k) of the k-mer is ";
+	 char temp[3];
+	 sprintf(temp, "%d", maxKeyLength);
+	 msg += temp;
+	 throw InvalidInputException(msg);
+	 }
+	 */
+
+	k = keyLength;
+	initialValue = initialValueIn;
+
+	// Initialize bases
+	bases = new I[k];
+	for (int i = k - 1; i >= 0; i--) {
+		bases[k - 1 - i] = (I) pow(4.0, i);
+	}
+
+	// Initialize mMinusOne
+	mMinusOne = new I[4];
+	for (int i = 0; i < 4; i++) {
+		mMinusOne[i] = i * bases[0];
+	}
+
+	// Get maximum size of table
+	char * temp = new char[k];
+	for (int i = 0; i < k; i++) {
+		temp[i] = 3;
+	}
+
+	maxTableSize = hash(temp) + 1;
+	delete[] temp;
+
+	// Initialize values
+	values = new V[maxTableSize];
+	for (I i = 0; i < maxTableSize; i++) {
+		values[i] = initialValue;
+	}
+
+	// Test
+	/*
+	 char key[] = { 3, 3, 3, 3, 0, 0, 0, 0 };
+	 long value = 100;
+	 insert(key, 4, value);
+	 long index = hash(key, 4);
+	 cout << "Index: " << index << " " << values[index] << endl;
+	 cout << "Index: " << index << " " << valueOf(key, 4) << endl;
+	 cout << "Number of filled entries: " << countNonZeroEntries() << endl;
+	 */
+}
+
+template<class I, class V>
+KmerHashTable<I, V>::~KmerHashTable() {
+	delete[] bases;
+	delete[] mMinusOne;
+	delete[] values;
+}
+
+/**
+ * word: an array of characters.
+ * The maximum integer value is 3 and the minimum is 0
+ */
+template<class I, class V>
+I KmerHashTable<I, V>::hash(const char * key) {
+	return hash(key, 0);
+}
+
+/**
+ * seq: an array of characters e.g. [0,0,1,1,1,3,2].
+ * start: the start index of the key.
+ * This method is designed to process a long sequence.
+ */
+template<class I, class V>
+I KmerHashTable<I, V>::hash(const char * sequence, int keyStart) {
+	I index = 0;
+	for (int i = 0; i < k; i++) {
+		char nucleotide = sequence[keyStart + i];
+		if (nucleotide >= 0 && nucleotide <= 3) {
+			index += bases[i] * sequence[keyStart + i];
+		} else {
+			string msg("The value of the char representing the nucleotide ");
+			msg.append("must be between 0 and 3.");
+			msg.append("The int value is ");
+			msg.append(Util::int2string((int) nucleotide));
+			msg.append(" of nucleotide at index ");
+			msg.append(Util::int2string(keyStart + i));
+
+			for (int h = 0 + keyStart; h < k + keyStart; h++) {
+				cerr << (int) sequence[h];
+			}
+			cerr << endl;
+
+			throw InvalidInputException(msg);
+		}
+	}
+	return index;
+}
+
+template<class I, class V>
+void KmerHashTable<I, V>::hash(const char * sequence, int start, int end,
+		vector<I> * hashList) {
+
+	for (int i = start; i <= end; i++) {
+		char nucleotide = sequence[i];
+		if (!(nucleotide >= 0 && nucleotide <= 3)) {
+			string msg("The value of the char representing the nucleotide ");
+			msg.append("must be between 0 and 3.");
+			msg.append("The int value is ");
+			msg.append(Util::int2string((int) nucleotide));
+			msg.append(" of nucleotide at index ");
+			msg.append(Util::int2string(i));
+
+			throw InvalidInputException(msg);
+		}
+	}
+
+	I lastHash = hash(sequence, start);
+	hashList->push_back(lastHash);
+
+	for (int i = start + 1; i <= end; i++) {
+		I s1 = 4 * (lastHash - mMinusOne[(int) sequence[i - 1]])
+				+ (int) sequence[i + k - 1];
+		hashList->push_back(s1);
+		lastHash = s1;
+	}
+}
+
+/**
+ * This method put the key-value pair in the table.
+ * Note: keys are unique, i.e. no duplicate keys.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::insert(const char* key, V value) {
+	insert(key, 0, value);
+}
+
+/**
+ * Similar to the above method.
+ * The key begins at start in seq.
+ * The length of the key is k.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::insert(const char* sequence, int keyStart, V value) {
+	values[hash(sequence, keyStart)] = value;
+}
+
+template<class I, class V>
+void KmerHashTable<I, V>::insert(I keyHash, V value) {
+	values[keyHash] = value;
+}
+
+/**
+ * Call wholesaleIncrement on the segment itself.
+ * Then, call it again on the reverse complement of this segment.
+ *
+ * sequence: is a long sequence usually a long segment of a chromosome.
+ * sFirstKmer: is the start index of the first k-mer.
+ * sLastKmer: is the start index of the last k-mer.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::wholesaleIncrement(const char* sequence,
+		int firstKmerStart, int lastKmerStart) {
+	// Increment k-mer's in the forward strand
+	vector<I> hashList = vector<I>();
+	hash(sequence, firstKmerStart, lastKmerStart, &hashList);
+
+	int size = hashList.size();
+	for (int i = 0; i < size; i++) {
+		I keyHash = hashList.at(i);
+		if (keyHash >= maxTableSize) {
+			cerr << "array out of bounds" << endl;
+			throw "";
+		}
+		values[keyHash]++;
+	}
+
+	// Increment k-mer's in the reverse complement
+	/*
+	string rc("");
+	Util::revCompDig(sequence, firstKmerStart, lastKmerStart + k - 1, &rc);
+
+	hashList.clear();
+	hash(rc.c_str(), 0, rc.size() - k, &hashList);
+	size = hashList.size();
+
+	for (int i = 0; i < size; i++) {
+		I keyHash = hashList.at(i);
+		values[keyHash]++;
+	}*/
+}
+
+/**
+ * Increment the entry associated with the key by one.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::increment(const char* key) {
+	increment(key, 0);
+}
+
+/**
+ * Increment the value associated with the key starting at keyStart in the
+ * sequence by one. Also, this method increments the count of the reverse complement
+ * of the kmer by one.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::increment(const char* sequence, int keyStart) {
+	// Increment the count of the kmer by one.
+	I index = hash(sequence, keyStart);
+	values[index]++;
+
+	// Generate the reverse complement of the kmer.
+	char * rcKmer = new char[k];
+	for (int j = 0; j < k; j++) {
+		switch (sequence[j + keyStart]) {
+		case 0:
+			rcKmer[k - 1 - j] = 3;
+			break;
+		case 1:
+			rcKmer[k - 1 - j] = 2;
+			break;
+		case 2:
+			rcKmer[k - 1 - j] = 1;
+			break;
+		case 3:
+			rcKmer[k - 1 - j] = 0;
+			break;
+		default:
+			string msg = string("Invalid code of a nucleotide: ");
+			msg.append(1, sequence[j + keyStart]);
+			msg.append(". Valid codes are 0, 1, 2, and 3.");
+			throw InvalidInputException(msg);
+		}
+	}
+
+	// Update the count of the reverse complement of the kmer by one.
+	I rcIndex = hash(rcKmer, 0);
+	values[rcIndex]++;
+
+	// Free memory
+	delete[] rcKmer;
+}
+
+/**
+ * Return the value associated with the key
+ */
+template<class I, class V>
+V KmerHashTable<I, V>::valueOf(const char* key) {
+	return valueOf(key, 0);
+}
+
+/**
+ * Return the value associated with the key
+ * The key is a substring of length k starting at keyStart in the sequence
+ */
+template<class I, class V>
+V KmerHashTable<I, V>::valueOf(const char* sequence, int keyStart) {
+	return values[hash(sequence, keyStart)];
+}
+
+template<class I, class V>
+V KmerHashTable<I, V>::valueOf(I keyHash) {
+	return values[keyHash];
+}
+
+template<class I, class V>
+void KmerHashTable<I, V>::wholesaleValueOf(const char * sequence,
+		int firstKmerStart, int lastKmerStart, vector<V> * results) {
+	wholesaleValueOf(sequence, firstKmerStart, lastKmerStart, results, 0);
+}
+
+/**
+ * The values are set in the results vector starting at the resultsStart.
+ * The contents of vector "results" must be initialized.
+ * Otherwise, the program will crash outputting: "segmentation fault 11"
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::wholesaleValueOf(const char * sequence,
+		int firstKmerStart, int lastKmerStart, vector<V> * results,
+		int resultsStart) {
+
+	int index = resultsStart;
+	vector<I> hashList = vector<I>();
+	hash(sequence, firstKmerStart, lastKmerStart, &hashList);
+	int size = hashList.size();
+
+	for (int i = 0; i < size; i++) {
+		(*results)[index] = values[hashList.at(i)];
+		index++;
+	}
+}
+
+/**
+ * This method returns the number of occupied entries in the table.
+ * A non-occupied entry has the initial value.
+ */
+template<class I, class V>
+I KmerHashTable<I, V>::countNonInitialEntries() {
+	I count = 0;
+	for (I i = 0; i < maxTableSize; i++) {
+		if (values[i] != initialValue) {
+			count++;
+		}
+	}
+	return count;
+}
+
+/**
+ * Make a list of the k-mers.
+ */
+template<class I, class V>
+vector<string>* KmerHashTable<I, V>::getKeys() {
+	vector<char> * alpha = new vector<char>();
+	alpha->push_back((char) 0);
+	alpha->push_back((char) 1);
+	alpha->push_back((char) 2);
+	alpha->push_back((char) 3);
+
+	vector<string> *words = new vector<string>();
+	for (int h = 0; h < alpha->size(); h++) {
+		words->push_back(string(1, alpha->at(h)));
+	}
+
+	int wLen = k;
+	for (int i = 1; i < wLen; i++) {
+		vector<string> *wordsAtItrI = new vector<string>();
+		for (I j = 0; j < words->size(); j++) {
+			for (int h = 0; h < alpha->size(); h++) {
+				string w = string(words->at(j));
+				w.append(1, alpha->at(h));
+				wordsAtItrI->push_back(w);
+			}
+		}
+		words->clear();
+		delete words;
+		words = new vector<string>(*wordsAtItrI);
+
+		// Free memory
+		wordsAtItrI->clear();
+		delete wordsAtItrI;
+	}
+
+	// Free memory
+	alpha->clear();
+	delete alpha;
+	return words;
+}
+
+/**
+ * Print the contents of the whole table
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::printTable(string output) {
+	vector<const char *> keys;
+//	getKeys(keys);
+
+	ofstream out(output.c_str());
+
+	for (I i = 0; i < keys.size(); i++) {
+		const char * kmer = keys.at(i);
+		for (int j = 0; j < k; j++) {
+			out << (int) kmer[j];
+		}
+		cerr << "Hash: " << hash(keys.at(i), 0) << endl;
+
+		out << " -> " << values[hash(keys.at(i), 0)] << endl;
+	}
+
+	out.close();
+	keys.clear();
+}
+
+template<class I, class V>
+int KmerHashTable<I, V>::getK() {
+	return k;
+}
+
+template<class I, class V>
+I KmerHashTable<I, V>::getMaxTableSize() {
+	return maxTableSize;
+}
+
+template<class I, class V>
+const V * KmerHashTable<I, V>::getValues() const {
+	return values;
+}
+
+/**
+ * Call after building the table.
+ * A negative value is a likely indication of overflow.
+ */
+template<class I, class V>
+void KmerHashTable<I, V>::checkOverflow() {
+	for (I y = 0; y < maxTableSize; y++) {
+		if (values[y] < 0) {
+			string msg("A negative value is a likely indication of overflow. ");
+			msg.append(
+					"To the developer, consider larger data type in KmerHashTable.");
+			throw InvalidStateException(msg);
+		}
+	}
+}
+
+template<class I, class V>
+V KmerHashTable<I, V>::getMaxValue() {
+	V max = 0;
+	for (I y = 0; y < maxTableSize; y++) {
+		if (values[y] > max) {
+			max = values[y];
+		}
+	}
+	return max;
+}
diff --git a/src/nonltr/KmerHashTable.h b/src/nonltr/KmerHashTable.h
new file mode 100644
index 0000000..7c38e23
--- /dev/null
+++ b/src/nonltr/KmerHashTable.h
@@ -0,0 +1,83 @@
+/*
+ * KmerHashTable.h
+ *
+ *  Created on: Jul 25, 2012
+ *      Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH
+ */
+
+#ifndef KMERHASHTABLE_H_
+#define KMERHASHTABLE_H_
+
+#include <string>
+#include <vector>
+#include "ITableView.h"
+
+using namespace std;
+using namespace nonltr;
+
+namespace nonltr {
+
+template<class I, class V>
+class KmerHashTable: public ITableView<I,V> {
+
+protected:
+	/* Fields */
+	static const int maxKeyLength = 15;
+	int k;
+
+
+	I maxTableSize;
+
+	// The hashed values, i.e. the values of the hash table.
+	// The index is the 4ry representation of the key
+	V * values;
+	V initialValue;
+
+private:
+	// [4^0, 4^1, ... , 4^(k-1)]
+	I * bases;
+	I * mMinusOne;
+	void initialize(int, V);
+
+public:
+	/* Methods */
+	KmerHashTable(int);
+	KmerHashTable(int, V);
+
+	virtual ~KmerHashTable();
+
+	I hash(const char *);
+	I hash(const char *, int);
+	void hash(const char *, int, int, vector<I> *);
+
+	void insert(const char*, V);
+	void insert(const char*, int, V);
+	void insert(I, V);
+
+	void increment(const char*);
+	void increment(const char*, int);
+	void wholesaleIncrement(const char*, int, int);
+
+	void addReverseComplement();
+	I countNonInitialEntries();
+	vector<string> *getKeys();
+	void printTable(string);
+	void checkOverflow();
+
+	/*Vritual methods from ITableView*/
+	virtual V valueOf(const char*);
+	virtual V valueOf(const char*, int);
+	virtual V valueOf(I);
+	virtual void wholesaleValueOf(const char *, int, int, vector<V> *);
+	virtual void wholesaleValueOf(const char *, int, int, vector<V> *, int);
+
+	virtual int getK();
+	virtual I getMaxTableSize();
+	virtual V getMaxValue();
+	virtual const V * getValues() const;
+};
+}
+
+#include "KmerHashTable.cpp"
+
+#endif /* KMERHASHTABLE_H_ */
diff --git a/src/nonltr/LocationList.cpp b/src/nonltr/LocationList.cpp
new file mode 100644
index 0000000..4b93d36
--- /dev/null
+++ b/src/nonltr/LocationList.cpp
@@ -0,0 +1,153 @@
+/*
+ * LocationList.cpp
+ *
+ *  Created on: Feb 19, 2015
+ *      Author: Hani Zakaria Girgis, PhD
+ *
+ *
+ * An instance of this class holds a list of merged locations.
+ */
+
+#include "LocationList.h"
+
+namespace nonltr {
+
+LocationList::LocationList(string chromNameIn) {
+	chromName = chromNameIn;
+	regionList = new vector<ILocation *>();
+	merge();
+}
+
+LocationList::~LocationList() {
+	Util::deleteInVector(regionList);
+	delete regionList;
+}
+
+void LocationList::add(int start, int end) {
+	regionList->push_back(new Location(start, end));
+}
+
+void LocationList::merge() {
+	int regionCount = regionList->size();
+	int gg = 0;
+	while (gg < regionCount) {
+		ILocation * region = regionList->at(gg);
+
+		int regionStart = region->getStart();
+		int regionEnd = region->getEnd();
+
+		if (gg > 0) {
+			ILocation * pRegion = regionList->at(gg - 1);
+			int pStart = pRegion->getStart();
+			int pEnd = pRegion->getEnd();
+
+			if (Util::isOverlapping(pStart, pEnd, regionStart, regionEnd)) {
+				pRegion->setEnd(regionEnd > pEnd ? regionEnd : pEnd);
+				regionList->erase(regionList->begin() + gg);
+				delete region;
+				regionCount = regionList->size();
+			} else {
+				gg++;
+			}
+		}
+
+		if (gg == 0) {
+			gg++;
+		}
+	}
+}
+
+void LocationList::mergeWithAnotherList(
+		const vector<ILocation *> * const otherList) {
+	//A pre-condition: Ensure that the other list is sorted
+	for (int h = 1; h < otherList->size(); h++) {
+		if (otherList->at(h)->getStart() < otherList->at(h - 1)->getStart()) {
+			throw InvalidStateException(
+					string("LocationList - The other list is not sorted."));
+		}
+	}
+
+	// Start
+	vector<ILocation *> * mergedList = new vector<ILocation *>();
+
+	int i = 0;
+	int j = 0;
+	int iLimit = regionList->size();
+	int jLimit = otherList->size();
+
+	// Continue until one list is finished
+	while (i < iLimit && j < jLimit) {
+		ILocation * iLoc = regionList->at(i);
+		ILocation * jLoc = otherList->at(j);
+
+		if (iLoc->getStart() < jLoc->getStart()) {
+			mergedList->push_back(iLoc);
+			i++;
+		} else {
+			mergedList->push_back(new Location(*jLoc));
+			j++;
+		}
+	}
+
+	// Once one list is finished, copy the rest of the other list
+	if (i == iLimit) {
+		for (; j < jLimit; j++) {
+			mergedList->push_back(new Location(*(otherList->at(j))));
+		}
+	} else if (j == jLimit) {
+		for (; i < iLimit; i++) {
+			mergedList->push_back(regionList->at(i));
+		}
+	}
+
+	// Once done
+	// Util::deleteInVector(regionList);
+	regionList->clear();	// Need to test this line
+	delete regionList;
+	regionList = mergedList;
+
+	merge();
+
+	//A post-condition: Ensure that the list is sorted
+	for (int h = 1; h < regionList->size(); h++) {
+		if (regionList->at(h)->getStart() < regionList->at(h - 1)->getStart()) {
+			throw InvalidStateException(string("This list is not sorted."));
+		}
+	}
+}
+
+void LocationList::print() {
+	cout << endl << chromName << endl;
+	for (int i = 0; i < regionList->size(); i++) {
+		int s = regionList->at(i)->getStart();
+		int e = regionList->at(i)->getEnd();
+		cout << s << "-" << e << endl;
+	}
+}
+
+const vector<ILocation*> * LocationList::getList() {
+	return regionList;
+}
+
+void LocationList::convertToRedFormat() {
+	trim(1);
+}
+
+void LocationList::trim(int x) {
+	for (int i = regionList->size() - 1; i >= 0; i--) {
+		ILocation * region = regionList->at(i);
+		int start = region->getStart();
+		int newEnd = region->getEnd() - x;
+
+		if (newEnd < 0 || start > newEnd) {
+			regionList->erase(regionList->begin() + i);
+			delete region;
+		} else {
+			region->setEnd(newEnd);
+		}
+	}
+}
+
+}
+
+/* namespace nonltr */
diff --git a/src/nonltr/LocationList.h b/src/nonltr/LocationList.h
new file mode 100644
index 0000000..1f5202a
--- /dev/null
+++ b/src/nonltr/LocationList.h
@@ -0,0 +1,53 @@
+/*
+ * LocationList.h
+ *
+ *  Created on: Feb 19, 2015
+ *      Author: Hani Z. Girgis, PhD
+ */
+
+#ifndef SRC_NONLTR_LOCATIONLIST_H_
+#define SRC_NONLTR_LOCATIONLIST_H_
+
+#include <vector>
+#include "../utility/Util.h"
+#include "../utility/ILocation.h"
+#include "../utility/Location.h"
+#include "../exception/InvalidStateException.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+
+class LocationList {
+private:
+	string chromName;
+	vector<ILocation *> * regionList;
+	void merge();
+
+public:
+	LocationList(string);
+	virtual ~LocationList();
+
+	void add(int, int);
+
+	/**
+	 * Take a sorted list
+	 */
+	void mergeWithAnotherList(const vector<ILocation *> * const);
+
+
+	/**
+	 * Print locations
+	 */
+	void print();
+
+	const vector<ILocation*> * getList();
+	void convertToRedFormat();
+	void trim(int );
+};
+
+} /* namespace nonltr */
+
+#endif /* SRC_NONLTR_LOCATIONLIST_H_ */
diff --git a/src/nonltr/LocationListCollection.cpp b/src/nonltr/LocationListCollection.cpp
new file mode 100644
index 0000000..14c7a05
--- /dev/null
+++ b/src/nonltr/LocationListCollection.cpp
@@ -0,0 +1,101 @@
+/*
+ * LocationListCollection.cpp
+ *
+ *  Created on: Feb 19, 2015
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "LocationListCollection.h"
+
+namespace nonltr {
+
+LocationListCollection::LocationListCollection(string fileNameIn) {
+	fileName = fileNameIn;
+	collection = new map<string, LocationList *>();
+	readCoordinates();
+}
+
+LocationListCollection::~LocationListCollection() {
+	collection->clear();
+	delete collection;
+}
+
+void LocationListCollection::readCoordinates() {
+	Util::checkFile(fileName);
+
+	ifstream in(fileName.c_str());
+	LocationList * locList;
+	string previousChromName("");
+
+	while (in.good()) {
+		string line;
+		getline(in, line);
+
+		if (line.compare(string("")) != 0) {
+			int colIndex = line.find_last_of(':');
+			int dashIndex = line.find_last_of('-');
+
+			string chromName = line.substr(0, colIndex);
+
+			if (previousChromName.compare(chromName) != 0) {
+
+				cout << "Processing regions of " << chromName << endl;
+
+				locList = new LocationList(chromName);
+				collection->insert(
+						map<string, LocationList *>::value_type(chromName,
+								locList));
+
+				previousChromName = chromName;
+			}
+
+			int start =
+					atoi(
+							line.substr(colIndex + 1, dashIndex - colIndex - 1).c_str());
+			int end = atoi(line.substr(dashIndex + 1).c_str());
+			locList->add(start, end);
+		}
+	}
+
+	in.close();
+}
+
+void LocationListCollection::print() {
+	map<string, LocationList *>::iterator itr_s = collection->begin();
+	map<string, LocationList *>::iterator itr_e = collection->end();
+	while (itr_s != itr_e) {
+		collection->at(itr_s->first)->print();
+		++itr_s;
+	}
+}
+
+LocationList * const LocationListCollection::getLocationList(string header) {
+	if (collection->count(header) == 0) {
+		string msg("Regions of ");
+		msg.append(header);
+		msg.append(" cannot be found.\n");
+		throw InvalidStateException(msg);
+	}
+
+	return collection->at(header);
+}
+
+void LocationListCollection::convertToRedFormat() {
+	map<string, LocationList *>::iterator itr_s = collection->begin();
+	map<string, LocationList *>::iterator itr_e = collection->end();
+	while (itr_s != itr_e) {
+		collection->at(itr_s->first)->convertToRedFormat();
+		++itr_s;
+	}
+}
+
+void LocationListCollection::trim(int x) {
+	map<string, LocationList *>::iterator itr_s = collection->begin();
+	map<string, LocationList *>::iterator itr_e = collection->end();
+	while (itr_s != itr_e) {
+		collection->at(itr_s->first)->trim(x);
+		++itr_s;
+	}
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/LocationListCollection.h b/src/nonltr/LocationListCollection.h
new file mode 100644
index 0000000..2461e97
--- /dev/null
+++ b/src/nonltr/LocationListCollection.h
@@ -0,0 +1,41 @@
+/*
+ * LocationListCollection.h
+ *
+ *  Created on: Feb 19, 2015
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef SRC_NONLTR_LOCATIONLISTCOLLECTION_H_
+#define SRC_NONLTR_LOCATIONLISTCOLLECTION_H_
+
+#include <fstream>
+#include <map>
+
+#include "LocationList.h"
+#include "../utility/Util.h"
+#include "../exception/InvalidStateException.h"
+
+using namespace std;
+using namespace utility;
+
+namespace nonltr {
+
+class LocationListCollection {
+
+private:
+	string fileName;
+	map<string, LocationList *> * collection;
+	void readCoordinates();
+
+public:
+	LocationListCollection(string);
+	virtual ~LocationListCollection();
+	LocationList * const getLocationList(string);
+	void print();
+	void convertToRedFormat();
+	void trim(int );
+};
+
+} /* namespace nonltr */
+
+#endif /* SRC_NONLTR_LOCATIONLISTCOLLECTION_H_ */
diff --git a/src/nonltr/Scanner.cpp b/src/nonltr/Scanner.cpp
new file mode 100644
index 0000000..8a24070
--- /dev/null
+++ b/src/nonltr/Scanner.cpp
@@ -0,0 +1,379 @@
+/*
+ * Scanner.cpp
+ *
+ *  Created on: Aug 19, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+#include "Scanner.h"
+
+namespace nonltr {
+
+Scanner::Scanner(HMM * hmmIn, int kIn, ChromosomeOneDigit * chromIn,
+		string scoresFile) {
+	// ToDo: Fix this operation
+	string msg("Scanning file of scores is temporarily disabled.");
+	throw InvalidOperationException(msg);
+
+	hmm = hmmIn;
+	k = kIn;
+	chrom = chromIn;
+	segmentList = chrom->getSegment();
+	scorer = NULL;
+	scoreList = new vector<int>();
+	ifstream in(scoresFile.c_str());
+	if (in) {
+		string header;
+		getline(in, header);
+
+		string score;
+		while (in >> score) {
+			scoreList->push_back(atoi(score.c_str()));
+		}
+		in.close();
+	} else {
+		string msg(scoresFile);
+		msg.append(" does not exist.");
+		throw FileDoesNotExistException(msg);
+	}
+
+	regionList = new vector<ILocation *>();
+
+	// Start scanning
+	start();
+}
+
+Scanner::Scanner(HMM * hmmIn, int kIn, ChromosomeOneDigit * chromIn,
+		ITableView<unsigned long, int> * table) {
+	hmm = hmmIn;
+	k = kIn;
+
+	chrom = chromIn;
+	segmentList = chrom->getSegment();
+	scorer = new Scorer(chrom, table);
+	scorer->takeLog(hmm->getBase());
+	scoreList = scorer->getScores();
+	regionList = new vector<ILocation *>();
+
+	// Start scanning
+	start();
+}
+
+Scanner::~Scanner() {
+	if (scorer == NULL) {
+		scoreList->clear();
+		delete scoreList;
+	} else {
+		delete scorer;
+	}
+
+	Util::deleteInVector(regionList);
+	delete regionList;
+}
+
+void Scanner::start() {
+	check();
+
+	decode();
+
+	extendByK();
+
+	merge();
+}
+
+void Scanner::check() {
+	if (chrom->size() != scoreList->size()) {
+		string msg("The size of the sequence is not the same as the size of ");
+		msg.append("the scores. The size of sequence is: ");
+		msg.append(Util::int2string(chrom->size()));
+		msg.append(". The size of the scores is: ");
+		msg.append(Util::int2string(scoreList->size()));
+		msg.append(".");
+		throw InvalidStateException(msg);
+	}
+}
+
+void Scanner::decode() {
+	int segmentCount = segmentList->size();
+	for (int tt = 0; tt < segmentCount; tt++) {
+		vector<int> * segment = segmentList->at(tt);
+		hmm->decode(segment->at(0), segment->at(1), scoreList, *regionList);
+	}
+}
+
+void Scanner::extendByK() {
+	int regionCount = regionList->size();
+	if (regionCount > 0) {
+		int firstCandIndex = 0;
+		int lastCandIndex = 0;
+		int segmentNumber = segmentList->size();
+		for (int i = 0; i < segmentNumber; i++) {
+			vector<int> * s = segmentList->at(i);
+			ILocation * c = regionList->at(firstCandIndex);
+			// Sometimes a segment have no repeats
+			if (Util::isOverlapping(s->at(0), s->at(1), c->getStart(),
+					c->getEnd())) {
+				lastCandIndex = extendByKHelper(s->at(0), s->at(1),
+						firstCandIndex);
+				firstCandIndex = lastCandIndex + 1;
+				if (firstCandIndex >= regionCount) {
+					break;
+				}
+			}
+		}
+	}
+}
+
+int Scanner::extendByKHelper(int segStart, int segEnd, int firstCandIndex) {
+	ILocation * cand = regionList->at(firstCandIndex);
+
+	// Make sure that the first region is overlapping with the segment
+	if (!Util::isOverlapping(segStart, segEnd, cand->getStart(),
+			cand->getEnd())) {
+		string msg("The first region is not overlapping with the segment.");
+		msg.append(" Region: ");
+		msg.append(Util::int2string(cand->getStart()));
+		msg.append(":");
+		msg.append(Util::int2string(cand->getEnd()));
+		msg.append(" Segment: ");
+		msg.append(Util::int2string(segStart));
+		msg.append(":");
+		msg.append(Util::int2string(segEnd));
+		throw InvalidInputException(msg);
+	}
+
+	int lastCandIndex = -1;
+	int candidateNumber = regionList->size();
+	for (int c = firstCandIndex; c < candidateNumber; c++) {
+		ILocation * cand = regionList->at(c);
+		if (Util::isOverlapping(segStart, segEnd, cand->getStart(),
+				cand->getEnd())) {
+			int newEnd = cand->getEnd() + k - 1;
+			if (newEnd > segEnd) {
+				newEnd = segEnd;
+			}
+			cand->setEnd(newEnd);
+			lastCandIndex = c;
+		} else {
+			break;
+		}
+	}
+
+	if (lastCandIndex < 0) {
+		string msg("The index of the last region cannot be negative.");
+		throw InvalidStateException(msg);
+	}
+
+	return lastCandIndex;
+}
+
+void Scanner::merge() {
+	int regionCount = regionList->size();
+	int gg = 0;
+	while (gg < regionCount) {
+		ILocation * region = regionList->at(gg);
+
+		int regionStart = region->getStart();
+		int regionEnd = region->getEnd();
+
+		if (gg > 0) {
+			ILocation * pRegion = regionList->at(gg - 1);
+			int pStart = pRegion->getStart();
+			int pEnd = pRegion->getEnd();
+
+			if (Util::isOverlapping(pStart, pEnd, regionStart, regionEnd)) {
+				pRegion->setEnd(regionEnd > pEnd ? regionEnd : pEnd);
+				regionList->erase(regionList->begin() + gg);
+				delete region;
+				regionCount = regionList->size();
+			} else {
+				gg++;
+			}
+		}
+
+		if (gg == 0) {
+			gg++;
+		}
+	}
+}
+
+void Scanner::mergeWithOtherRegions(const vector<ILocation *> * otherList) {
+	vector<ILocation *> * mergedList = new vector<ILocation *>();
+
+	int i = 0;
+	int j = 0;
+	int iLimit = regionList->size();
+	int jLimit = otherList->size();
+
+	// Continue until one list is finished
+	while (i < iLimit && j < jLimit) {
+		ILocation * iLoc = regionList->at(i);
+		ILocation * jLoc = otherList->at(j);
+
+		if (iLoc->getStart() < jLoc->getStart()) {
+			mergedList->push_back(iLoc);
+			i++;
+		} else {
+			mergedList->push_back(new Location(*jLoc));
+			j++;
+		}
+	}
+
+	// Once one list is finished, copy the rest of the other list
+	if (i == iLimit) {
+		for (; j < jLimit; j++) {
+			mergedList->push_back(new Location(*(otherList->at(j))));
+		}
+	} else if (j == jLimit) {
+		for (; i < iLimit; i++) {
+			mergedList->push_back(regionList->at(i));
+		}
+	}
+
+	// Once done
+	// Util::deleteInVector(regionList);
+	// @@ Need to be tested
+	regionList->clear();
+	delete regionList;
+	regionList = mergedList;
+
+	merge();
+
+	//Ensure that the list is sorted
+	for (int h = 1; h < regionList->size(); h++) {
+		if (regionList->at(h)->getStart() < regionList->at(h - 1)->getStart()) {
+			throw InvalidStateException(string("This list is not sorted."));
+		}
+	}
+}
+
+void Scanner::makeForwardCoordinates() {
+	int regionNum = regionList->size();
+	int lastBase = chrom->size() - 1;
+
+	// Calculate the coordinate on the main strand
+	for (int i = 0; i < regionNum; i++) {
+		ILocation * oldLoc = regionList->at(i);
+		regionList->at(i) = new Location(lastBase - oldLoc->getEnd(),
+				lastBase - oldLoc->getStart());
+		delete oldLoc;
+	}
+
+	// Reverse the regions within the list
+	int lastRegion = regionNum - 1;
+	int middle = regionNum / 2;
+	for (int i = 0; i < middle; i++) {
+		ILocation * temp = regionList->at(lastRegion - i);
+		regionList->at(lastRegion - i) = regionList->at(i);
+		regionList->at(i) = temp;
+	}
+
+}
+
+/**
+ * Warning: this method prints the logarithm values of the scores
+ */
+void Scanner::printScores(string outputFile, bool canAppend) {
+	cout << "Printing the logarithmic values of the scores ";
+	cout << "NOT the original scores." << endl;
+
+	ofstream outScores;
+	if (canAppend) {
+		outScores.open(outputFile.c_str(), ios::out | ios::app);
+	} else {
+		outScores.open(outputFile.c_str(), ios::out);
+	}
+
+	int step = 50;
+	outScores << chrom->getHeader() << endl;
+	int len = scoreList->size();
+	for (int i = 0; i < len; i = i + step) {
+		int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1;
+		for (int k = i; k <= e; k++) {
+			outScores << scoreList->at(k) << " ";
+		}
+		outScores << endl;
+	}
+	outScores << endl;
+	outScores.close();
+}
+
+void Scanner::printIndex(string outputFile, bool canAppend, int frmt) {
+
+	if(frmt != FRMT_POS && frmt != FRMT_BED){
+		string msg("Unknown output format: ");
+		msg.append(Util::int2string(frmt));
+		msg.append(". The known formats are: ");
+		msg.append(Util::int2string(FRMT_POS));
+		msg.append(" and ");
+		msg.append(Util::int2string(FRMT_BED));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	ofstream outIndex;
+	if (canAppend) {
+		outIndex.open(outputFile.c_str(), ios::out | ios::app);
+	} else {
+		outIndex.open(outputFile.c_str(), ios::out);
+	}
+
+	// Write the index of the repeat segment [x,y[
+	string header = chrom->getHeader();
+
+	if(frmt == FRMT_POS){
+		for (int j = 0; j < regionList->size(); j++) {
+			outIndex << header << ":";
+			outIndex << ((int) (regionList->at(j)->getStart())) << "-";
+			outIndex << ((int) (regionList->at(j)->getEnd() + 1));
+			outIndex << endl;
+		}
+	}else if(frmt == FRMT_BED){
+		for (int j = 0; j < regionList->size(); j++) {
+			outIndex << header << "\t";
+			outIndex << ((int) (regionList->at(j)->getStart())) << "\t";
+			outIndex << ((int) (regionList->at(j)->getEnd() + 1));
+			outIndex << endl;
+		}
+	}
+
+	outIndex.close();
+}
+
+void Scanner::printMasked(string outputFile, Chromosome& oChrom,
+		bool canAppend) {
+
+	string baseCopy = *(oChrom.getBase());
+	int regionCount = regionList->size();
+	for (int j = 0; j < regionCount; j++) {
+		for (int h = regionList->at(j)->getStart();
+				h <= regionList->at(j)->getEnd(); h++) {
+			baseCopy[h] = tolower(baseCopy[h]);
+		}
+	}
+
+	ofstream outMask;
+
+	if (canAppend) {
+		outMask.open(outputFile.c_str(), ios::out | ios::app);
+	} else {
+		outMask.open(outputFile.c_str(), ios::out);
+	}
+
+	outMask << oChrom.getHeader() << endl;
+	int step = 50;
+	int len = baseCopy.size();
+	for (int i = 0; i < len; i = i + step) {
+		int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1;
+		for (int k = i; k <= e; k++) {
+			outMask << baseCopy[k];
+		}
+		outMask << endl;
+	}
+	outMask.close();
+}
+
+const vector<ILocation*>* Scanner::getRegionList() {
+	return regionList;
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/Scanner.h b/src/nonltr/Scanner.h
new file mode 100644
index 0000000..683de7e
--- /dev/null
+++ b/src/nonltr/Scanner.h
@@ -0,0 +1,71 @@
+/*
+ * Scanner.h
+ *
+ *  Created on: Aug 19, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef SCANNER_H_
+#define SCANNER_H_
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+#include "Chromosome.h"
+#include "ChromosomeOneDigit.h"
+#include "HMM.h"
+#include "ITableView.h"
+#include "Scorer.h"
+#include "../utility/Util.h"
+#include "../utility/ILocation.h"
+#include "../utility/Location.h"
+#include "../exception/InvalidInputException.h"
+#include "../exception/InvalidStateException.h"
+#include "../exception/FileDoesNotExistException.h"
+#include "../exception/InvalidOperationException.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+
+class Scanner {
+private:
+	//string chromFile;
+	ChromosomeOneDigit * chrom;
+	const vector<vector<int> *> * segmentList;
+	Scorer * scorer;
+	vector<int> * scoreList;
+	vector<ILocation *> * regionList;
+	int k;
+	HMM * hmm;
+	// bool isTrainMode;
+
+	// Methods
+	void start();
+	void check();
+	void decode();
+	void extendByK();
+	int extendByKHelper(int, int, int);
+	void merge();
+
+public:
+	static const int FRMT_POS = 1;
+	static const int FRMT_BED = 2;
+
+	Scanner(HMM *, int, ChromosomeOneDigit *, string);
+	Scanner(HMM *, int, ChromosomeOneDigit *, ITableView<unsigned long, int> *);
+	virtual ~Scanner();
+	void makeForwardCoordinates();
+
+	void printScores(string, bool);
+	void printIndex(string, bool, int);
+	void printMasked(string, Chromosome&, bool);
+	void mergeWithOtherRegions(const vector<ILocation *> *);
+	const vector<ILocation*>* getRegionList();
+};
+
+} /* namespace nonltr */
+#endif /* SCANNER_H_ */
diff --git a/src/nonltr/Scorer.cpp b/src/nonltr/Scorer.cpp
new file mode 100644
index 0000000..947f9b8
--- /dev/null
+++ b/src/nonltr/Scorer.cpp
@@ -0,0 +1,143 @@
+/*
+ * Scorer.cpp
+ *
+ *  Created on: Aug 3, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+#include "Scorer.h"
+
+Scorer::Scorer(ChromosomeOneDigit * chromIn,
+		ITableView<unsigned long, int> * const table) {
+	chrom = chromIn;
+	kmerTable = table;
+	scores = new vector<int>(chrom->getBase()->size(), 0);
+	k = kmerTable->getK();
+	max = -1;
+	score();
+	calculateMax();
+}
+
+Scorer::~Scorer() {
+	scores->clear();
+	delete scores;
+}
+
+/**
+ * This method scores each nucleotide in the chromosome.
+ * The nucleotides represented by 'N' are assigned zero.
+ */
+void Scorer::score() {
+	const vector<vector<int> *> * segment = chrom->getSegment();
+	const char * segBases = chrom->getBase()->c_str();
+
+	for (int s = 0; s < segment->size(); s++) {
+		int start = segment->at(s)->at(0);
+		int end = segment->at(s)->at(1);
+		kmerTable->wholesaleValueOf(segBases, start, end - k + 1, scores,
+				start);
+
+		// Handle the last word from end - k + 2 till the end, inclusive.
+		for (int i = end - k + 2; i <= end; i++) {
+			(*scores)[i] = scores->at(i - 1);
+		}
+	}
+}
+
+/**
+ * This method takes the logarithm of the scores according to the base.
+ * If the score equals zero, it is left the same.
+ */
+void Scorer::takeLog(double base) {
+	// Handle the case where base is one
+	bool isOne = false;
+	if (fabs(base - 1.0) < std::numeric_limits<double>::epsilon()) {
+		isOne = true;
+	}
+	double logBase = isOne ? log(1.5) : log(base);
+
+	const vector<vector<int> *> * segment = chrom->getSegment();
+	for (int s = 0; s < segment->size(); s++) {
+		int start = segment->at(s)->at(0);
+		int end = segment->at(s)->at(1);
+		for (int h = start; h <= end; h++) {
+			int score = scores->at(h);
+
+			if (score != 0) {
+				if (!isOne || (isOne && score > 1)) {
+					(*scores)[h] = ceil(log(score) / logBase);
+				}
+			}
+		}
+	}
+}
+
+int Scorer::getK() {
+	return k;
+}
+
+vector<int>* Scorer::getScores() {
+	return scores;
+}
+
+void Scorer::printScores(string outputFile, bool canAppend) {
+	ofstream outScores;
+	if (canAppend) {
+		outScores.open(outputFile.c_str(), ios::out | ios::app);
+	} else {
+		outScores.open(outputFile.c_str(), ios::out);
+	}
+
+	int step = 50;
+	outScores << chrom->getHeader() << endl;
+	int len = scores->size();
+	for (int i = 0; i < len; i = i + step) {
+		int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1;
+		for (int k = i; k <= e; k++) {
+			outScores << scores->at(k) << " ";
+		}
+		outScores << endl;
+	}
+	outScores << endl;
+
+	outScores.close();
+}
+
+int Scorer::countLessOrEqual(int thr) {
+	int count = 0;
+	const vector<vector<int> *> * segment = chrom->getSegment();
+	for (int s = 0; s < segment->size(); s++) {
+		int start = segment->at(s)->at(0);
+		int end = segment->at(s)->at(1);
+		for (int h = start; h <= end; h++) {
+			if (scores->at(h) <= thr) {
+				count++;
+			}
+		}
+	}
+	return count;
+}
+
+void Scorer::calculateMax() {
+	const vector<vector<int> *> * segmentList = chrom->getSegment();
+	int segmentCount = segmentList->size();
+	for (int jj = 0; jj < segmentCount; jj++) {
+		vector<int> * segment = segmentList->at(jj);
+		int start = segment->at(0);
+		int end = segment->at(1);
+		for (int ss = start; ss <= end; ss++) {
+			int score = scores->at(ss);
+			if (score > max) {
+				max = score;
+			}
+		}
+	}
+
+	if (max == -1) {
+		string msg("Error occurred while finding the maximum score.");
+		throw InvalidStateException(msg);
+	}
+}
+
+int Scorer::getMax() {
+	return max;
+}
diff --git a/src/nonltr/Scorer.h b/src/nonltr/Scorer.h
new file mode 100644
index 0000000..06daaf4
--- /dev/null
+++ b/src/nonltr/Scorer.h
@@ -0,0 +1,54 @@
+/*
+ * Scorer.h
+ *
+ *  Created on: Aug 3, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef SCORER_H_
+#define SCORER_H_
+
+#include <vector>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <limits>
+
+#include "ITableView.h"
+#include "ChromosomeOneDigit.h"
+#include "../utility/Util.h"
+#include "../exception/InvalidStateException.h"
+
+using namespace std;
+using namespace nonltr;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+class Scorer {
+private:
+	/* Fields */
+	ChromosomeOneDigit * chrom;
+	ITableView<unsigned long, int> * kmerTable;
+	vector<int> * scores;
+	int k;
+	int max;
+
+	/* Methods */
+	void score();
+	void calculateMax();
+
+public:
+	/* Methods */
+	Scorer(ChromosomeOneDigit *, ITableView<unsigned long, int> *);
+	virtual ~Scorer();
+	void printScores(string, bool);
+	vector<int>* getScores();
+	int getK();
+	void takeLog(double);
+	int countLessOrEqual(int);
+	int getMax();
+};
+}
+
+#endif /* Scorer_H_ */
diff --git a/src/nonltr/TableBuilder.cpp b/src/nonltr/TableBuilder.cpp
new file mode 100644
index 0000000..32733a9
--- /dev/null
+++ b/src/nonltr/TableBuilder.cpp
@@ -0,0 +1,121 @@
+/*
+ * TableBuilder.cpp
+ *
+ *  Created on: Jul 31, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "TableBuilder.h"
+
+TableBuilder::TableBuilder(string dir, int motifSize, int order, int minObs) {
+	genomeDir = dir;
+	k = motifSize;
+	genomeLength = 0;
+	// kmerTable = new KmerHashTable(k);
+	// kmerTable = new EnrichmentView(k);
+
+	// Whenever you change the template, modify line 50 and 70 and the header file line 35
+	kmerTable = new EnrichmentMarkovView<unsigned long, int>(k, order, minObs);
+
+	buildTable();
+}
+
+TableBuilder::~TableBuilder() {
+	delete kmerTable;
+}
+
+void TableBuilder::buildTable() {
+	vector<string> * fileList = new vector<string>();
+	Util::readChromList(genomeDir, fileList, "fa");
+
+	for (int i = 0; i < fileList->size(); i++) {
+		cout << "Counting k-mers in " << fileList->at(i) << " ..." << endl;
+		ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+		const vector<Chromosome *> * chromList = maker->makeChromOneDigitList();
+
+		for (int h = 0; h < chromList->size(); h++) {
+			ChromosomeOneDigit * chrom =
+					dynamic_cast<ChromosomeOneDigit *>(chromList->at(h));
+			if (chrom) {
+				genomeLength += chrom->getEffectiveSize();
+				updateTable(chrom);
+			} else {
+				throw InvalidStateException(string("Dynamic cast failed."));
+			}
+		}
+
+		delete maker;
+	}
+	// Check if overflow has occurred
+	kmerTable->checkOverflow();
+
+	// View
+	// EnrichmentView * view = dynamic_cast<EnrichmentView *>(kmerTable);
+	EnrichmentMarkovView<unsigned long, int> * view =
+			dynamic_cast<EnrichmentMarkovView<unsigned long, int> *>(kmerTable);
+
+	if (view) {
+		view->generateProbapilities();
+		view->processTable();
+		maxValue = view->getMaxValue();
+	} else {
+		throw InvalidStateException(string("Dynamic cast failed."));
+	}
+	cout << "Enrichment view is ready." << endl;
+
+	fileList->clear();
+	delete fileList;
+
+	/* If you would like to see the contents of the table.*/
+	// kmerTable-> printTable();
+}
+
+void TableBuilder::updateTable(ChromosomeOneDigit * chrom) {
+	// EnrichmentView * view = dynamic_cast<EnrichmentView *>(kmerTable);
+	EnrichmentMarkovView<unsigned long, int> * view =
+			dynamic_cast<EnrichmentMarkovView<unsigned long, int> *>(kmerTable);
+
+	const vector<vector<int> *> * segment = chrom->getSegment();
+	const char * segBases = chrom->getBase()->c_str();
+
+	for (int s = 0; s < segment->size(); s++) {
+		int start = segment->at(s)->at(0);
+		int end = segment->at(s)->at(1);
+		// cerr << "The segment length is: " << (end-start+1) << endl;
+
+		// Fast, but require some memory proportional to the segment length.
+		kmerTable->wholesaleIncrement(segBases, start, end - k + 1);
+		if (view) {
+			view->count(segBases, start, end);
+		} else {
+			throw InvalidStateException(string("Dynamic cast failed."));
+		}
+
+		// Slow, but memory efficient
+		/*
+		 vector<int> hashList = vector<int>();
+		 kmerTable->hash(segBases, start, end - k + 1, &hashList);
+
+		 for (int i = start; i <= end - k + 1; i++) {
+		 kmerTable->increment(segBases, i);
+		 }
+		 */
+	}
+}
+
+KmerHashTable<unsigned long, int> * const TableBuilder::getKmerTable() {
+	return kmerTable;
+}
+
+long TableBuilder::getGenomeLength() {
+	if (genomeLength < 0) {
+		string msg("The length of the genome cannot be negative.");
+		throw InvalidStateException(msg);
+	}
+
+	return genomeLength;
+}
+
+int TableBuilder::getMaxValue() {
+	return maxValue;
+}
diff --git a/src/nonltr/TableBuilder.h b/src/nonltr/TableBuilder.h
new file mode 100644
index 0000000..1041f3d
--- /dev/null
+++ b/src/nonltr/TableBuilder.h
@@ -0,0 +1,68 @@
+/*
+ * TableBuilder.h
+ *
+ *  Created on: Jul 31, 2012
+ *      Author: Hani Zakaria Girgis, PhD - NCBI/NLM/NIH
+ */
+
+#ifndef TABLEBUILDER_H_
+#define TABLEBUILDER_H_
+
+#include "KmerHashTable.h"
+#include "EnrichmentMarkovView.h"
+#include "ChromosomeOneDigit.h"
+#include "ChromListMaker.h"
+#include "IChromosome.h"
+
+#include "../utility/Util.h"
+#include "../exception/InvalidStateException.h"
+
+#include <iostream>
+
+using namespace std;
+using namespace nonltr;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+class TableBuilder {
+private:
+	/**
+	 * k-mer table
+	 */
+	KmerHashTable<unsigned long,int> * kmerTable;
+	int maxValue;
+
+	/**
+	 * Directory including the FASTA files comprising the genome.
+	 * These files must have the
+	 */
+	string genomeDir;
+
+	/**
+	 * The size of the motif
+	 */
+	int k;
+
+	/**
+	 * The total length of the whole genome
+	 */
+	long genomeLength;
+
+	/**
+	 * Methods
+	 */
+	void buildTable();
+	void updateTable(ChromosomeOneDigit *);
+
+public:
+	TableBuilder(string, int, int, int);
+	virtual ~TableBuilder();
+	KmerHashTable<unsigned long,int> * const getKmerTable();
+	void printTable();
+	long getGenomeLength();
+	int getMaxValue();
+};
+}
+
+#endif /* TABLEBUILDER_H_ */
diff --git a/src/nonltr/Trainer.cpp b/src/nonltr/Trainer.cpp
new file mode 100644
index 0000000..3e8865f
--- /dev/null
+++ b/src/nonltr/Trainer.cpp
@@ -0,0 +1,278 @@
+/*
+ * Trainer.cpp
+ *
+ *  Created on: Aug 20, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "Trainer.h"
+
+namespace nonltr {
+
+// Pass the isCND and the isCON parameters
+
+Trainer::Trainer(string genomeDirIn, int orderIn, int kIn, double sIn,
+		double tIn, string candidateDirIn, int m) : minObs(m) {
+	candidateDir = candidateDirIn;
+	canPrintCandidates = true;
+	isCND = true;
+	isCON = false;
+	initialize(genomeDirIn, orderIn, kIn, sIn, tIn);
+}
+
+Trainer::Trainer(string genomeDirIn, int orderIn, int kIn, double sIn,
+		double tIn, string candidateDirIn, bool isCNDIn, string otherDirIn, int m) : minObs(m) {
+	candidateDir = candidateDirIn;
+	canPrintCandidates = true;
+	isCND = isCNDIn;
+	isCON = true;
+	otherDir = otherDirIn;
+	initialize(genomeDirIn, orderIn, kIn, sIn, tIn);
+}
+
+Trainer::Trainer(string genomeDirIn, int orderIn, int kIn, double sIn,
+		double tIn, int m) : minObs(m) {
+	canPrintCandidates = false;
+	isCND = true;
+	isCON = false;
+	initialize(genomeDirIn, orderIn, kIn, sIn, tIn);
+}
+
+Trainer::Trainer(string genomeDirIn, int orderIn, int kIn, double sIn,
+		double tIn, bool isCNDIn, string otherDirIn, int m) : minObs(m) {
+	canPrintCandidates = false;
+	isCND = isCNDIn;
+	isCON = true;
+	otherDir = otherDirIn;
+	initialize(genomeDirIn, orderIn, kIn, sIn, tIn);
+}
+
+void Trainer::initialize(string genomeDirIn, int orderIn, int kIn, double sIn,
+		double tIn) {
+
+	if (isCND == false && isCON == false) {
+		string msg("Training using the candidates or the other repeats is required. ");
+		msg.append("Please specify which regions to be used for training. ");
+		msg.append("Any of the two sets or a combination of both can be used.");
+		throw InvalidStateException(msg);
+	}
+
+	genomeDir = genomeDirIn;
+	fileList = new vector<string>();
+	Util::readChromList(genomeDir, fileList, string("fa"));
+	chromCount = fileList->size();
+	order = orderIn;
+	k = kIn;
+	s = sIn;
+	t = tIn;
+	p = 0.0;
+	tDetector = tIn + 0.1;
+	max = -1;
+
+	stage1();
+
+	if (isCND) {
+		stage2();
+	}
+	stage3();
+}
+
+Trainer::~Trainer() {
+	fileList->clear();
+	delete fileList;
+	delete builder;
+	delete hmm;
+}
+
+/**
+ * Stage 1: Building the table
+ */
+void Trainer::stage1() {
+	cout << endl << endl;
+	cout << "Stage 1: Building the table ..." << endl;
+	builder = new TableBuilder(genomeDir, k, order, minObs);
+	table = builder->getKmerTable();
+	genomeLength = builder->getGenomeLength();
+	max = builder->getMaxValue();
+}
+
+void Trainer::stage2() {
+	cout << endl << endl;
+	cout << "Stage 2: Calculating the percentage ..." << endl;
+
+	double effectiveSize = 0.0;
+	double countLessOrEqual = 0.0;
+	for (int i = 0; i < chromCount; i++) {
+		cout << "Calculating the percentage in: " << fileList->at(i) << " ...";
+		cout << endl;
+		ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+		const vector<Chromosome *> * chromList = maker->makeChromOneDigitList();
+
+		for (int h = 0; h < chromList->size(); h++) {
+			ChromosomeOneDigit * chrom =
+					dynamic_cast<ChromosomeOneDigit *>(chromList->at(h));
+			Scorer * scorer = new Scorer(chrom, table);
+
+			effectiveSize += chrom->getEffectiveSize();
+			countLessOrEqual += scorer->countLessOrEqual(t);
+
+			delete scorer;
+		}
+		delete maker;
+	}
+
+	if (effectiveSize == 0) {
+		string msg("The size of the genome cannot be zero.");
+		throw InvalidStateException(msg);
+	} else {
+		p = 100.00 * countLessOrEqual / effectiveSize;
+		cout << "The percentage is " << p << endl;
+		if (p < 52.5) {
+			p = 52.5;
+			cout << "The percentage is increased to " << p << endl;
+		}
+	}
+}
+
+/**
+ * Stage 3: Training
+ */
+void Trainer::stage3() {
+	cout << endl << endl;
+	cout << "Stage 3: Training ..." << endl;
+
+	// Handle the case when the threshold is one.
+	bool isOne = false;
+	if (fabs(t - 1.0) < std::numeric_limits<double>::epsilon()) {
+		isOne = true;
+	}
+	double hmmBase = isOne ? 1.5 : t;
+
+	// Make a list of candidate HMM
+	int stateCount = 2 * (ceil(log(max) / log(hmmBase)) + 1);
+
+	// Initialize the HMM
+	hmm = new HMM(hmmBase, stateCount);
+
+	// Start training the models
+	for (int i = 0; i < chromCount; i++) {
+		cout << "Training on: " << fileList->at(i) << endl;
+		// Name of candidates file
+		string path(fileList->at(i));
+		int slashLastIndex = path.find_last_of(Util::fileSeparator);
+		int dotLastIndex = path.find_last_of(".");
+		string nickName = path.substr(slashLastIndex + 1, dotLastIndex - slashLastIndex - 1);
+
+		// May or may not be used
+		string cndFile = candidateDir + Util::fileSeparator + nickName + ".cnd";
+
+		// Work on the other repeats if desired
+		LocationListCollection * otherRegionListCollection;
+		bool isConRepAvailable = false;
+		if (isCON) {
+			string otherFile = otherDir + Util::fileSeparator + nickName + ".rpt";
+			ifstream f1(otherFile.c_str());
+			if (!f1) {
+				string message = string("Warning: ");
+				message.append(otherFile);
+				message.append(" does not exist. ");
+				message.append("Repeats of this sequence will not used for training the HMM.");
+				cout << message << endl;
+			} else {
+				otherRegionListCollection = new LocationListCollection(otherFile);
+				otherRegionListCollection->convertToRedFormat();
+				otherRegionListCollection->trim(k - 1);
+
+				isConRepAvailable = true;
+			}
+			f1.close();
+		}
+
+		// Read sequences in the file
+		ChromListMaker * maker = new ChromListMaker(fileList->at(i));
+		const vector<Chromosome *> * chromList = maker->makeChromOneDigitList();
+
+		for (int h = 0; h < chromList->size(); h++) {
+			ChromosomeOneDigit * chrom = dynamic_cast<ChromosomeOneDigit *>(chromList->at(h));
+			Scorer * scorer = new Scorer(chrom, table);
+			vector<int> * scoreList = scorer->getScores();
+
+			// Detect candidates if desired
+			ChromDetectorMaxima * detector;
+			const vector<ILocation*> * trainingRegionList;
+			bool canDeleteDetector = true;
+			if (isCND) {
+				if (canPrintCandidates) {
+					detector = new ChromDetectorMaxima(s, 10, 0, tDetector, p,s, scoreList, chrom);
+					if (h > 0) {
+						bool canAppend = true;
+						detector->printIndex(cndFile, canAppend);
+					} else {
+						cout << "Printing candidates to: " << cndFile << endl;
+						detector->printIndex(cndFile);
+					}
+				} else {
+					detector = new ChromDetectorMaxima(s, 10, 0, tDetector, p, s, scoreList, chrom->getSegment());
+				}
+				trainingRegionList = detector->getRegionList();
+
+				
+			}
+
+			if (isCON && isConRepAvailable) {
+				LocationList * const locList = otherRegionListCollection->getLocationList(chrom->getHeader());
+				if (isCND) {
+					locList->mergeWithAnotherList(detector->getRegionList());
+				}
+				trainingRegionList = locList->getList();
+				
+			}
+
+			// The candidate regions are already copied to the location list
+			if (isCND && isCON && isConRepAvailable) {
+				delete detector;
+				canDeleteDetector = false;
+			}
+
+			// Train the HMM
+			if(isCND || (isCON && isConRepAvailable)){
+				
+				scorer->takeLog(t);
+				scoreList = scorer->getScores();
+				hmm->train(scoreList, chrom->getSegment(), trainingRegionList);
+			}
+
+			// Free more memory
+			if (isCND && canDeleteDetector) {
+				delete detector;
+			}
+			delete scorer;
+		}
+
+		if (isCON && isConRepAvailable) {
+			delete otherRegionListCollection;
+		}
+		delete maker;
+	}
+
+	// Normalize HMM's once training is finished
+	hmm->normalize();
+}
+
+void Trainer::printTable(string fileName) {
+	table->printTable(fileName);
+}
+
+HMM*& Trainer::getHmm() {
+	return hmm;
+}
+
+KmerHashTable<unsigned long, int> * Trainer::getTable() {
+	return table;
+}
+
+void Trainer::printHmm(string fileName) {
+	hmm->print(fileName);
+}
+
+} /* namespace nonltr */
diff --git a/src/nonltr/Trainer.h b/src/nonltr/Trainer.h
new file mode 100644
index 0000000..8281343
--- /dev/null
+++ b/src/nonltr/Trainer.h
@@ -0,0 +1,80 @@
+/*
+ * Trainer.h
+ *
+ *  Created on: Aug 20, 2013
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef TRAINER_H_
+#define TRAINER_H_
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <limits>
+#include <algorithm>
+
+#include "TableBuilder.h"
+#include "KmerHashTable.h"
+#include "HMM.h"
+#include "ChromDetectorMaxima.h"
+#include "Scorer.h"
+#include "ChromListMaker.h"
+#include "LocationListCollection.h"
+#include "../utility/Util.h"
+#include "../exception/InvalidStateException.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace nonltr {
+
+class Trainer {
+private:
+	string genomeDir;
+	string candidateDir;
+	string otherDir;
+	bool canPrintCandidates;
+	vector<string> * fileList;
+	int chromCount;
+	int order;
+	int k;
+	int max; // Maximum score in the entire genome
+	double t; // Score threshold
+	double tDetector; // threshold for the detector because it uses < not <=;
+	double p; // Percentage of scores below the threshold, t, in non-repeats
+	//double r;
+	double s; // Half width of the mask
+	long genomeLength;
+	//vector<int> * sampleList;
+	TableBuilder * builder;
+	KmerHashTable<unsigned long, int> * table;
+	HMM * hmm;
+	int isCND;
+	int isCON;
+	// The minimum number of the observed k-mers
+	const int minObs;
+
+	void stage1();
+	void stage2();
+	void stage3();
+	//void stage4();
+
+public:
+	Trainer(string, int, int, double, double, string, int);
+	Trainer(string, int, int, double, double, string, bool, string, int);
+	Trainer(string, int, int, double, double, int);
+	Trainer(string, int, int, double, double, bool, string, int);
+
+	void initialize(string, int, int, double, double);
+	virtual ~Trainer();
+	void printTable(string);
+	void printHmm(string);
+	HMM*& getHmm();
+	KmerHashTable<unsigned long, int> * getTable();
+
+};
+
+} /* namespace nonltr */
+#endif /* TRAINER_H_ */
diff --git a/src/utility/AffineId.cpp b/src/utility/AffineId.cpp
new file mode 100644
index 0000000..484a5bd
--- /dev/null
+++ b/src/utility/AffineId.cpp
@@ -0,0 +1,212 @@
+/*
+ * AffineId.cpp
+ *
+ *  Created on: Dec 6, 2012
+ *  Modified on: Nov 6, 2017
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+// ToDo:
+// 1. Add pre-conditions after testing
+#include "AffineId.h"
+
+#include "Util.h"
+#include "../exception/InvalidInputException.h"
+
+#include <iostream>
+#include <cstring>
+using namespace std;
+//using namespace exception;
+
+namespace utility {
+
+AffineId::AffineId(const char * seq1In, int start1In, int end1In,
+		const char * seq2In, int start2In, int end2In) {
+
+	// The shorter of the two sequences is seq2
+	seq1 = seq1In;
+	start1 = start1In;
+	end1 = end1In;
+
+	seq2 = seq2In;
+	start2 = start2In;
+	end2 = end2In;
+
+	if (end1 - start1 < end2 - start2) {
+		seq1 = seq2In;
+		start1 = start2In;
+		end1 = end2In;
+
+		seq2 = seq1In;
+		start2 = start1In;
+		end2 = end1In;
+	}
+
+	/*	if (start1 < 0 || end1 < 0 || start1 > end1) {
+	 string msg("Invalid Input. Start1 is ");
+	 msg.append(Util::int2string(start1));
+	 msg.append(". End 1 is ");
+	 msg.append(Util::int2string(end1));
+	 msg.append(".");
+	 //throw InvalidInputException(msg);
+
+	 cerr << msg << endl;
+	 throw exception();
+	 }
+
+	 if (start2 < 0 || end2 < 0 || start2 > end2) {
+	 string msg("Invalid Input. Start2 is ");
+	 msg.append(Util::int2string(start2));
+	 msg.append(". End2 is ");
+	 msg.append(Util::int2string(end2));
+	 msg.append(".");
+	 //throw InvalidInputException(msg);
+
+	 cerr << msg << endl;
+	 throw exception();
+	 }*/
+
+	// Validate input
+	// cout << start1 << " " << end1 << endl;
+	// cout << start2 << " " << end2 << endl;
+
+	len1 = end1 - start1 + 2;
+	len2 = end2 - start2 + 2;
+
+	align();
+}
+
+AffineId::~AffineId() {
+}
+
+void AffineId::align() {
+	// Initialize needed arrays
+	auto m = new int[len2][2](); // Middle level array
+	auto u = new int[len2][2](); // Upper level array
+	auto mId = new int[len2][2](); // Array storing number of matches in the middle array
+	auto uId = new int[len2][2](); // Array storing number of matches in the upper array
+	auto mPath = new int[len2][2](); // Array storing number of steps in the middle array
+	auto uPath = new int[len2][2](); // Array storing number of steps in the upper array
+
+	// Apply the DP
+	// The i index is only used to get a character from the first sequence
+	// It is not used for filling the DP matrix
+	for (int i = 1; i < len1; i++) {
+		char base1 = seq1[start1 + i - 1];
+		int lower = 0;
+		int lowerId = 0;
+		int lowerPath = 0;
+
+		// j is the row. There are only two columns 0 and 1
+		for (int j = 1; j < len2; j++) {
+			// Update the lower value
+			int extLower = lower + EXT;
+			int openLower = m[j - 1][0] + OPEN;
+			if (extLower > openLower) {
+				lower = extLower;
+				lowerPath++;
+			} else {
+				lower = openLower;
+				lowerId = mId[j - 1][0];
+				lowerPath = mPath[j - 1][0] + 1;
+			}
+
+			// Fill the array of the upper level
+			int extUpper = u[j][0] + EXT;
+			int openUpper = m[j][0] + OPEN;
+			if (extUpper > openUpper) {
+				u[j][1] = extUpper;
+				uId[j][1] = uId[j][0];
+				uPath[j][1] = uPath[j][0] + 1;
+			} else {
+				u[j][1] = openUpper;
+				uId[j][1] = mId[j][0];
+				uPath[j][1] = mPath[j][0] + 1;
+			}
+
+			// Fill the array of the middle level
+			int matchOrMis;
+			if (base1 == seq2[start2 + j - 1]) {
+				matchOrMis = m[j - 1][0] + MATCH;
+			} else {
+				matchOrMis = m[j - 1][0] + MIS;
+			}
+
+			int lowerOrUpper;
+			if (lower > u[j][1]) {
+				lowerOrUpper = lower;
+			} else {
+				lowerOrUpper = u[j][1];
+			}
+
+			if (matchOrMis > lowerOrUpper) {
+				m[j][1] = matchOrMis;
+				mPath[j][1] = mPath[j - 1][0] + 1;
+				if (base1 == seq2[start2 + j - 1]) {
+					mId[j][1] = mId[j - 1][0] + 1;
+				} else {
+					mId[j][1] = mId[j - 1][0];
+				}
+			} else {
+				m[j][1] = lowerOrUpper;
+				if (lower > u[j][1]) {
+					mId[j][1] = lowerId;
+					mPath[j][1] = lowerPath;
+				} else {
+					mId[j][1] = uId[j][1];
+					mPath[j][1] = uPath[j][1];
+				}
+			}
+		}
+
+		// // Test
+		// for (int h = 0; h < len2; h++) {
+		// 	cout << m[h][0] << "\t" << m[h][1] << "----" << mId[h][0] << "\t"
+		// 			<< mId[h][1] << endl;
+		// }
+		// cout << "---------------------------------------------------" << endl;
+		// // End of test
+
+		// Copy the second column to the first one
+		if (i != len1 - 1) {
+			for (int h = 0; h < len2; h++) {
+				m[h][0] = m[h][1];
+				u[h][0] = u[h][1];
+				mId[h][0] = mId[h][1];
+				uId[h][0] = uId[h][1];
+				mPath[h][0] = mPath[h][1];
+				uPath[h][0] = uPath[h][1];
+			}
+		}
+	}
+
+	lenCS = mId[len2 - 1][1];
+	lenPath = mPath[len2 - 1][1];
+	//cout << "Alignment length = " << lenPath << endl;
+	delete[] u;
+	delete[] m;
+	delete[] mId;
+	delete[] uId;
+	delete[] mPath;
+	delete[] uPath;
+}
+
+double AffineId::getAlign() {
+	double amt = lenCS;
+	return amt / (double)lenPath;
+}
+
+}
+/* namespace utility */
+
+// // Testing code
+// int main() {
+// 	string s1("GATCTCAG");
+// 	string s2("GACAG");
+
+// 	utility::AffineId id(s1.c_str(), 0, s1.length() - 1, s2.c_str(), 0,
+// 			s2.length() - 1);
+// 	cout << "Length = " << id.getLenCS() << endl;
+
+// 	return 0;
+// }
diff --git a/src/utility/AffineId.h b/src/utility/AffineId.h
new file mode 100644
index 0000000..61173e7
--- /dev/null
+++ b/src/utility/AffineId.h
@@ -0,0 +1,50 @@
+/*
+ * AffineId.h
+ *
+ *  Created on: Dec 6, 2012
+ *  Modified on: Nov 6, 2017
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef AFFINEID_H_
+#define AFFINEID_H_
+
+namespace utility {
+
+class AffineId {
+private:
+	const char * seq1;
+	int start1;
+	int end1;
+	const char * seq2;
+	int start2;
+	int end2;
+
+	int len1;
+	int len2;
+	//int lenTotal;
+	int lenCS;
+	int lenPath;
+	int * m; // Middle level
+	//int * l; // Lower level
+	int * u; // Upper level
+
+	// const int MATCH = 4; // Score of a match
+	// const int MIS = -4; // Score of a mismatch
+	// const int OPEN = -2; // Score of a gap opening
+	// const int EXT = -1; // Score of a gap extension
+
+	const int MATCH = 1;
+	const int MIS = -1;
+	const int OPEN = -2;
+	const int EXT = -1;
+	void align();
+
+public:
+	AffineId(const char *, int, int, const char *, int, int);
+	virtual ~AffineId();
+        double getAlign();
+};
+
+} /* namespace utility */
+#endif /* AFFINEID_H_ */
diff --git a/src/utility/EmptyLocation.cpp b/src/utility/EmptyLocation.cpp
new file mode 100644
index 0000000..38e8920
--- /dev/null
+++ b/src/utility/EmptyLocation.cpp
@@ -0,0 +1,53 @@
+/*
+ * EmptyLocation.cpp
+ *
+ *  Created on: Dec 28, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "EmptyLocation.h"
+#include "../exception/InvalidOperationException.h"
+
+using namespace exception;
+
+namespace utility {
+
+EmptyLocation * EmptyLocation::INSTANCE = new EmptyLocation();
+
+EmptyLocation * EmptyLocation::getInstance(){
+	return INSTANCE;
+}
+
+EmptyLocation::EmptyLocation() {
+	msg = new string("Empty location does not allow this operation.");
+}
+
+EmptyLocation::~EmptyLocation() {
+	delete msg;
+}
+
+string EmptyLocation::toString() {
+	return string("Empty");
+}
+
+int EmptyLocation::getEnd() const {
+	throw InvalidOperationException(*msg);
+}
+
+int EmptyLocation::getStart() const {
+	throw InvalidOperationException(*msg);
+}
+
+void EmptyLocation::setEnd(int int1) {
+	throw InvalidOperationException(*msg);
+}
+
+void EmptyLocation::setStart(int int1) {
+	throw InvalidOperationException(*msg);
+}
+
+int EmptyLocation::getLength() {
+	throw InvalidOperationException(*msg);
+}
+
+} /* namespace tr */
diff --git a/src/utility/EmptyLocation.h b/src/utility/EmptyLocation.h
new file mode 100644
index 0000000..4b0c6e9
--- /dev/null
+++ b/src/utility/EmptyLocation.h
@@ -0,0 +1,35 @@
+/*
+ * EmptyLocation.h
+ *
+ *  Created on: Dec 28, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef EMPTYLOCATION_H_
+#define EMPTYLOCATION_H_
+
+#include "ILocation.h"
+
+namespace utility {
+
+class EmptyLocation: public ILocation {
+private:
+	string * msg;
+	static EmptyLocation * INSTANCE;
+	EmptyLocation();
+	virtual ~EmptyLocation();
+
+public:
+	virtual int getEnd() const;
+	virtual int getStart() const;
+	virtual void setEnd(int);
+	virtual void setStart(int);
+	virtual int getLength();
+	virtual string toString();
+
+	static EmptyLocation * getInstance();
+
+};
+
+} /* namespace tr */
+#endif /* EMPTYLOCATION_H_ */
diff --git a/src/utility/GlobAlignE.cpp b/src/utility/GlobAlignE.cpp
new file mode 100644
index 0000000..f94bfcd
--- /dev/null
+++ b/src/utility/GlobAlignE.cpp
@@ -0,0 +1,317 @@
+/**
+ * Author: Joseph Valencia
+ * Modified by Benjamin James
+ * Date: 12/14/17
+ * Bioinformatics Toolsmith Laboratory, University of Tulsa
+ * */
+#include <string>
+#include "../exception/InvalidStateException.h"
+#include <algorithm>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <limits.h>
+#include <string.h>
+#include <cmath>
+#include "GlobAlignE.h"
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+GlobAlignE::GlobAlignE(const char * seq1In, int start1In, int end1In, const char * seq2In,
+        int start2In, int end2In, int matchIn, int mismatchIn, int gapOpenIn, int gapContinueIn){
+
+    seq1 = seq1In;
+	start1 = start1In;
+	end1 = end1In;
+
+    seq2 = seq2In;
+	start2 = start2In;
+	end2 = end2In;
+
+	len1 = end1 - start1 + 2;
+    len2 = end2 - start2 + 2;
+
+    //Incremental score storage
+    matches = new int[len1];
+    upperGap = new int[len1];
+    lowerGap = new int[len1];
+
+
+
+    //Incremental length storage
+    matchLen = new int[len1];
+    upperLen = new int[len1];
+    lowerLen = new int[len1];
+
+    //Incremental identity storage
+    matchId = new int[len1];
+    upperId = new int[len1];
+    lowerId = new int[len1];
+
+    match = matchIn;
+    mismatch = mismatchIn;
+    gapOpen = gapOpenIn;
+    gapContinue = gapContinueIn;
+    findAlignment();
+
+}
+/*
+GlobAlignE::GlobAlignE(string filename1,string filename2, int matchIn, int mismatchIn, int gapOpenIn, int gapContinueIn):GlobAlignE(string1.c_str(),0,string.size(),string2.c_str(),0,string2.size(),matchIn,mismatchIn,gapOpenIn,gapContinueIn){
+
+    ifstream ifs;
+
+    ifs.open (filename1, ifstream::in);
+    cout<<"FILE OPENED"<<endl;
+    char c = ifs.get();
+
+    if(c == '>'){
+
+        while(c!='\n'){
+            c = ifs.get();
+
+        }
+    }
+
+     string string1  ="";
+
+      while (ifs.good()) {
+
+
+        if (c!='\n'){
+        string1+=c;
+        }
+        c = ifs.get();
+      }
+
+      ifs.close();
+
+
+     ifstream ifs2;
+
+     ifs2.open (filename2, ifstream::in);
+
+     c = ifs2.get();
+
+     if(c == '>'){
+
+         while(c!='\n'){
+              c = ifs2.get();
+         }
+     }
+
+     string string2  ="";
+
+     while (ifs2.good()) {
+
+         if(c!='\n'){
+         string2+=c;
+         }
+         c = ifs2.get();
+     }
+
+     ifs2.close();
+
+     std::transform(string1.begin(),string1.end(),string1.begin(),::toupper);
+     std::transform(string2.begin(),string2.end(),string2.begin(),::toupper);
+
+   // return GlobAlignE(string1.c_str(),0,string.size(),string2.c_str(),0,string2.size(),matchIn,mismatchIn,gapOpenIn,gapContinueIn);
+
+}
+*/
+void GlobAlignE::findAlignment(){
+
+    int shorter = min(len2,len1)-1;
+    int lenDiff = abs(len2-len1);
+    int maxDiff=0;
+
+    if (lenDiff >=1){
+        maxDiff += -gapOpen- (lenDiff*gapContinue);
+    }
+
+    maxDiff+= (mismatch* shorter)-1;
+
+    const int negativeInf = maxDiff;
+
+    matches[0]= 0;
+    upperGap[0] = negativeInf;
+    lowerGap[0] = negativeInf;
+
+    matchLen[0] =0;
+    upperLen[0] =0;
+    lowerLen[0] =0;
+
+    matchId[0] =0;
+    upperId[0] = 0;
+    lowerId[0] =0;
+
+    //initial values
+    for (int i = 1; i<len1;i++){
+        upperGap[i] = negativeInf;
+        matches[i] = negativeInf;
+        lowerGap[i] = (-gapOpen)- (i*gapContinue);
+        matchLen[i]=i;
+        upperLen[i]=i;
+        lowerLen[i]=i;
+        matchId[i] =0;
+        upperId[i] =0;
+        lowerId[i] =0;
+    }
+
+    for( int j = 1;j<len2;j++){
+
+        int matchLag = matches[0]; //used for calculation of matches
+        int matchLenLag = matchLen[0];
+        int matchIdLag = matchId[0];
+
+        int upperGapLag = (-gapOpen)-((j-1)*gapContinue);
+        int upperLenLag = j-1;
+        int upperIdLag =0;
+
+        for(int i =1;i<len1;i++){
+
+            //compute values for upperGap
+            int ygapBegin = matches[i]-(gapOpen+gapContinue);
+            int ygapCont = upperGap[i]-gapContinue;
+
+            int ans = max(ygapBegin,ygapCont);
+
+            int store1 = upperGap[i];
+            int store2 = upperLen[i];
+            int store3 = upperId[i];
+
+            upperGap[i] = ans;
+
+            if( ans == ygapBegin){
+                upperLen[i] = matchLen[i]+1;
+                upperId[i] = matchId[i];
+            }
+            else if(ans == ygapCont){
+                upperLen[i] = upperLen[i]+1;
+                upperId[i] = upperId[i];
+            }
+
+
+            // compute values for match/mismatch
+            char a= seq1[start1+i-1];
+            char b = seq2[start2+j-1];
+            int matchScore = (a == b) ? match : mismatch;
+
+            int matched = matchLag + matchScore;
+
+            int xgapEnd = lowerGap[i-1] + matchScore;
+
+            int ygapEnd = upperGapLag+ matchScore;
+
+            ans = max(max(matched,xgapEnd),ygapEnd);
+
+            matchLag = matches[i]; //store current val matches in lag
+            matches[i] =ans;
+
+            int temp = matchLen[i];
+            int save = matchId[i];
+
+            if(ans == matched){
+                matchLen[i] = matchLenLag+1;
+                if(matchScore == match){
+                    matchId[i] = matchIdLag+1;
+                }
+                else{
+                    matchId[i] = matchIdLag;
+                }
+            }
+            else if (ans == xgapEnd){
+                matchLen[i] = lowerLen[i-1]+1;
+                if(matchScore ==match){
+                    matchId[i] = lowerId[i-1]+1;
+                }
+                else{
+                    matchId[i] = lowerId[i-1];
+                }
+            }
+            else{
+                matchLen[i] = upperLenLag+1;
+                if(matchScore ==match){
+                matchId[i] = upperIdLag+1;
+                }
+                else{
+                    matchId[i] = upperIdLag;
+                }
+            }
+            matchLenLag = temp;
+            matchIdLag = save;
+            upperGapLag= store1;
+            upperLenLag = store2;
+            upperIdLag = store3;
+
+        }
+
+        matches[0] = negativeInf;
+        matchLen[0] = j;
+        matchId[0] =0;
+
+        lowerGap[0]= negativeInf;
+        lowerLen[0] = j;
+        lowerId[0] =0;
+
+        for(int i = 1;i<len1;i++){
+
+                int xgapBegin = matches[i-1] -(gapOpen+gapContinue);
+                int xgapCont = lowerGap[i-1]- gapContinue;
+                int ans = max(xgapBegin,xgapCont);
+                lowerGap[i]=ans;
+                if(ans ==xgapBegin){
+                    lowerLen[i] = matchLen[i-1]+1;
+                    lowerId[i] = matchId[i-1];
+                }
+                else{
+                    lowerLen[i] = lowerLen[i-1]+1;
+                    lowerId[i] = lowerId[i-1];
+                }
+
+        }
+
+
+    }
+
+   alignmentScore= max(max(matches[len1-1], lowerGap[len1-1]), upperGap[len1-1]);
+
+   if(alignmentScore == matches[len1-1]){
+           alignmentLength = matchLen[len1-1];
+           totalMatches = matchId[len1-1];
+    }
+    else if(alignmentScore == lowerGap[len1-1]){
+            alignmentLength = lowerLen[len1-1];
+            totalMatches= lowerId[len1-1];
+    }
+    else{
+            alignmentLength = upperLen[len1-1];
+            totalMatches = upperId[len1-1];
+    }
+}
+
+int GlobAlignE::getScore(){
+    return alignmentScore;
+}
+int GlobAlignE::getLength(){
+    return alignmentLength;
+}
+
+double GlobAlignE::getIdentity(){
+   double totalMatch = (double) totalMatches;
+
+    return totalMatch/alignmentLength;
+}
+GlobAlignE::~GlobAlignE(){
+    delete [] matches;
+    delete [] upperGap;
+    delete [] lowerGap;
+    delete [] matchLen;
+    delete [] upperLen;
+    delete [] lowerLen;
+    delete [] matchId;
+    delete [] upperId;
+    delete [] lowerId;
+
+}
diff --git a/src/utility/GlobAlignE.h b/src/utility/GlobAlignE.h
new file mode 100644
index 0000000..052e627
--- /dev/null
+++ b/src/utility/GlobAlignE.h
@@ -0,0 +1,58 @@
+
+/**
+ * Author: Joseph Valencia
+ * Modified by Benjamin James
+ * Date: 12/14/17
+ * Bioinformatics Toolsmith Laboratory, University of Tulsa
+ * */
+#ifndef Glob_AlignE_H_
+#include <string>
+
+using namespace std;
+
+namespace utility{
+
+class GlobAlignE{
+
+private:
+    const char * seq1; //first sequence to be aligned
+    int start1;
+    int end1;
+    const char * seq2;//second sequence to be aligned
+    int start2;
+    int end2;
+    int len1;
+    int len2;
+    int lenTotal;
+    int match; //score for base pair match
+    int mismatch;//score for base pair mismatch
+    int gapOpen; //cost to open a gap
+    int gapContinue; //cost to continue a gap
+    int * matches;
+    int * upperGap;
+    int * lowerGap;
+    int * matchLen;
+    int * upperLen;
+    int * lowerLen;
+    int * matchId;
+    int * upperId;
+    int * lowerId;
+    int alignmentScore;
+    int alignmentLength;
+    int totalMatches;
+    string topString;
+    string bottomString;
+public:
+    GlobAlignE(const char*,int,int,const char *,int,int, int,int,int,int);
+    GlobAlignE(string,string,int,int,int,int);
+    virtual ~GlobAlignE();
+    void findAlignment();
+    double getIdentity();
+    int getLength();
+    void printAlignment();
+    int getScore();
+    int getLengthAlignment();
+
+};
+}
+#endif
diff --git a/src/utility/ILocation.h b/src/utility/ILocation.h
new file mode 100644
index 0000000..53f1ea6
--- /dev/null
+++ b/src/utility/ILocation.h
@@ -0,0 +1,29 @@
+/*
+ * ILocation.h
+ *
+ *  Created on: Dec 20, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef ILOCATION_H_
+#define ILOCATION_H_
+
+#include <string>
+
+using namespace std;
+
+namespace utility {
+
+class ILocation {
+public:
+	virtual int getEnd() const = 0;
+	virtual int getStart() const = 0;
+	virtual void setEnd(int) = 0;
+	virtual void setStart(int) = 0;
+	virtual int getLength() = 0;
+	virtual string toString() = 0;
+};
+
+}
+
+#endif /* ILOCATION_H_ */
diff --git a/src/utility/LCSLen.cpp b/src/utility/LCSLen.cpp
new file mode 100644
index 0000000..76e08e8
--- /dev/null
+++ b/src/utility/LCSLen.cpp
@@ -0,0 +1,103 @@
+/*
+ * LCSLen.cpp
+ *
+ *  Created on: Dec 6, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "LCSLen.h"
+#include "../utility/Util.h"
+#include "../exception/InvalidInputException.h"
+
+#include <iostream>
+
+using namespace std;
+using namespace exception;
+
+namespace utility {
+
+LCSLen::LCSLen(const char * seq1In, int start1In, int end1In,
+		const char * seq2In, int start2In, int end2In) {
+	seq1 = seq1In;
+	start1 = start1In;
+	end1 = end1In;
+
+	seq2 = seq2In;
+	start2 = start2In;
+	end2 = end2In;
+
+	if(start1 < 0 || end1 < 0 || start1 > end1){
+		string msg("Invalid Input. Start1 is ");
+		msg.append(Util::int2string(start1));
+		msg.append(". End 1 is ");
+		msg.append(Util::int2string(end1));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	if(start2 < 0 || end2 < 0 || start2 > end2){
+		string msg("Invalid Input. Start2 is ");
+		msg.append(Util::int2string(start2));
+		msg.append(". End2 is ");
+		msg.append(Util::int2string(end2));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	// Validate input
+	// cout << start1 << " " << end1 << endl;
+	// cout << start2 << " " << end2 << endl;
+
+
+	len1 = end1 - start1 + 2;
+	len2 = end2 - start2 + 2;
+
+	lenTotal = 2 * len2;
+	cTable = new int[lenTotal];
+
+	for (int i = 0; i < lenTotal; i++) {
+		cTable[i] = 0;
+	}
+
+	findLcs();
+}
+
+LCSLen::~LCSLen() {
+	delete[] cTable;
+}
+
+void LCSLen::findLcs() {
+	int iM1Index = 0;
+	int iIndex = len2;
+
+	for (int i = 1; i < len1; i++) {
+		char base1 = seq1[start1 + i - 1];
+
+		for (int j = 1; j < len2; j++) {
+			int ijIndex = iIndex + j;
+			if (base1 == seq2[start2 + j - 1]) {
+				cTable[ijIndex] = cTable[iM1Index + j - 1] + 1;
+			} else {
+				if (cTable[iM1Index + j] > cTable[iIndex + j - 1]) {
+					cTable[ijIndex] = cTable[iM1Index + j];
+				} else {
+					cTable[ijIndex] = cTable[iIndex + j - 1];
+				}
+			}
+		}
+
+		if(i != len1-1){
+			for(int h = 0; h < len2; h++){
+				cTable[h] = cTable[len2+h];
+			}
+		}
+	}
+	lenCS =  cTable[lenTotal-1];
+}
+
+int LCSLen::getLenCS(){
+	return lenCS;
+}
+
+}
+/* namespace utility */
diff --git a/src/utility/LCSLen.h b/src/utility/LCSLen.h
new file mode 100644
index 0000000..98b9364
--- /dev/null
+++ b/src/utility/LCSLen.h
@@ -0,0 +1,37 @@
+/*
+ * LCSLen.h
+ *
+ *  Created on: Dec 6, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef LCSLEN_H_
+#define LCSLEN_H_
+
+namespace utility {
+
+class LCSLen {
+private:
+	const char * seq1;
+	int start1;
+	int end1;
+	const char * seq2;
+	int start2;
+	int end2;
+
+	int len1;
+	int len2;
+	int lenTotal;
+	int lenCS;
+
+	int * cTable;
+	void findLcs();
+
+public:
+	LCSLen(const char *, int, int, const char *, int, int);
+	virtual ~LCSLen();
+	int getLenCS();
+};
+
+} /* namespace utility */
+#endif /* LCSLEN_H_ */
diff --git a/src/utility/Location.cpp b/src/utility/Location.cpp
new file mode 100644
index 0000000..7a39e03
--- /dev/null
+++ b/src/utility/Location.cpp
@@ -0,0 +1,74 @@
+/*
+ * Location.cpp
+ *
+ *  Created on: Dec 19, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#include "Location.h"
+#include "Util.h"
+#include "../exception/InvalidInputException.h"
+
+using namespace exception;
+
+namespace utility {
+
+Location::Location(int startIn, int endIn) {
+	initialize(startIn, endIn);
+}
+
+Location::Location(ILocation& cp) {
+	initialize(cp.getStart(), cp.getEnd());
+}
+
+void Location::initialize(int startIn, int endIn) {
+	start = startIn;
+	end = endIn;
+	check();
+
+}
+
+void Location::check() {
+	if (start < 0 || end < 0 || start > end) {
+		string msg("Invalid Input. Start is ");
+		msg.append(Util::int2string(start));
+		msg.append(". End is ");
+		msg.append(Util::int2string(end));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+}
+
+Location::~Location() {
+}
+
+int Location::getEnd() const {
+	return end;
+}
+
+int Location::getStart() const {
+	return start;
+}
+
+void Location::setEnd(int endIn) {
+	end = endIn;
+	check();
+}
+
+void Location::setStart(int startIn) {
+	start = startIn;
+	check();
+}
+
+int Location::getLength() {
+	return end - start + 1;
+}
+
+string Location::toString() {
+	string msg = (Util::int2string(start));
+	msg.append("-");
+	msg.append(Util::int2string(end));
+
+	return msg;
+}
+}
diff --git a/src/utility/Location.h b/src/utility/Location.h
new file mode 100644
index 0000000..042b2b9
--- /dev/null
+++ b/src/utility/Location.h
@@ -0,0 +1,41 @@
+/*
+ * Location.h
+ *
+ *  Created on: Dec 19, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef LOCATION_H_
+#define LOCATION_H_
+
+#include "ILocation.h"
+
+#include <string>
+
+using namespace std;
+
+namespace utility {
+
+class Location : public ILocation{
+private:
+	int start;
+	int end;
+	void initialize(int, int);
+	void check();
+
+public:
+	Location(int, int);
+	Location(ILocation&);
+	virtual ~Location();
+
+	int getEnd() const;
+	int getStart() const;
+	void setEnd(int);
+	void setStart(int);
+	int getLength();
+	string toString();
+};
+
+}
+
+#endif /* LOCATION_H_ */
diff --git a/src/utility/Util.cpp b/src/utility/Util.cpp
new file mode 100644
index 0000000..4a6d4c1
--- /dev/null
+++ b/src/utility/Util.cpp
@@ -0,0 +1,347 @@
+/*
+ * Util.cpp
+ *
+ *  Created on: Apr 24, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ *      This class has a collection of utilities.
+ */
+#include "Util.h"
+
+Util::Util() {
+	// TODO Auto-generated constructor stub
+
+}
+
+Util::~Util() {
+	// TODO Auto-generated destructor stub
+}
+
+string Util::fileSeparator("/");
+
+//string * Util::emptyString = new string("");
+
+void Util::readFasta(string seqFile, vector<string> * infoList,
+		vector<string> * seqList, bool canCheckFormat) {
+	ifstream in(seqFile.c_str());
+	string info;
+
+	bool isFirst = true;
+	string basePtr("");
+
+	while (in.good()) {
+		string line;
+		getline(in, line);
+		if (line[0] == '>') {
+			if (canCheckFormat) {
+				int colIndex = line.find_first_of(':');
+				int dashIndex = line.find_first_of('-');
+				if (colIndex < 0 || dashIndex < 0) {
+					string msg =
+							"The header must be in the following format: chromosome:start-end\n";
+					msg += "The current input: " + line;
+					throw InvalidInputException(msg);
+				}
+			}
+
+			infoList->push_back(line);
+			if (!isFirst) {
+				seqList->push_back(basePtr);
+				basePtr = string("");
+			} else {
+				isFirst = false;
+			}
+		} else {
+			basePtr.append(line);
+		}
+	}
+	seqList->push_back(basePtr);
+	in.close();
+
+	// cout << "The system read " << infoList->size() << " sequences." << endl;
+
+	// Post condition
+	if (infoList->size() != seqList->size()) {
+		cerr << "Error while reading the fasta input file. "
+				<< "Header count = " << infoList->size() << " "
+				<< "Sequence count = " << seqList->size() << endl;
+		exit(1);
+	}
+}
+
+void Util::readFasta(string seqFile, vector<string> * infoList,
+		vector<string> * seqList) {
+	ifstream in(seqFile.c_str());
+	string info;
+
+	bool isFirst = true;
+	string * basePtr = new string("");
+	while (in.good()) {
+		string line;
+		getline(in, line);
+		if (line[0] == '>') {
+			infoList->push_back(line);
+			if (!isFirst) {
+				seqList->push_back(*basePtr);
+				basePtr = new string("");
+			} else {
+				isFirst = false;
+			}
+		} else {
+			basePtr->append(line);
+		}
+	}
+	seqList->push_back(*basePtr);
+	in.close();
+
+	// Post condition
+	if (infoList->size() != seqList->size()) {
+		cerr << "Error while reading the fasta input file. "
+				<< "Header count = " << infoList->size() << " "
+				<< "Sequence count = " << seqList->size() << endl;
+		exit(1);
+	}
+}
+
+void Util::readCoordinates(string fileName, vector<Location *> * coor) {
+	checkFile(fileName);
+
+	ifstream in(fileName.c_str());
+	string line;
+
+	while (in >> line) {
+		int colIndex = line.find_first_of(':');
+		int dashIndex = line.find_first_of('-');
+
+		int start = atoi(line.substr(colIndex + 1, dashIndex - colIndex - 1).c_str());
+		int end = atoi(line.substr(dashIndex + 1).c_str());
+		Location * loc = new Location(start, end);
+		coor->push_back(loc);
+	}
+
+	//cout << "Read ";
+	//cout << coor->size() << endl;
+
+	in.close();
+}
+
+void Util::readChromList(string genomeDir, vector<string> * chromList,
+		string ext) {
+	// This function may not be platform-independent
+	// Credit: http://www.cplusplus.com/forum/beginner/9173/
+	DIR * dirPtr = opendir(genomeDir.c_str());
+
+	struct dirent * entry;
+	entry = readdir(dirPtr);
+	while (entry) {
+		string file(entry->d_name);
+		// Credit: http://stackoverflow.com/questions/51949/how-to-get-file-extension-from-string-in-c
+		if (file.substr(file.find_last_of(".") + 1) == ext) {
+			chromList->push_back(genomeDir + fileSeparator + entry->d_name);
+		}
+		entry = readdir(dirPtr);
+	}
+
+	closedir(dirPtr);
+}
+
+// This method will modify the contents of its parameter basePtr!
+void Util::toUpperCase(string * basePtr) {
+	string base = *basePtr;
+	// Convert alphabet to upper case
+	for (int i = 0; i < base.length(); i++) {
+		base[i] = toupper(base[i]);
+	}
+}
+
+void Util::toUpperCase(string& base) {
+	// Convert alphabet to upper case
+	for (int i = 0; i < base.length(); i++) {
+		base[i] = toupper(base[i]);
+	}
+}
+
+// credit: http://stackoverflow.com/questions/228005/alternative-to-itoa-for-converting-integer-to-string-c
+string Util::int2string(int i) {
+	string s;
+	stringstream out;
+	out << i;
+	s = out.str();
+	return s;
+}
+
+// Need to use templates
+string Util::double2string(double i) {
+	string s;
+	stringstream out;
+	out << i;
+	s = out.str();
+	return s;
+}
+
+string Util::long2string(long i) {
+	string s;
+	stringstream out;
+	out << i;
+	s = out.str();
+	return s;
+}
+
+void Util::checkFile(string fileName) {
+	ifstream f1(fileName.c_str());
+	if (!f1) {
+		string message = string("ERROR: ");
+		message.append(fileName);
+		message.append(" does not exist.\n");
+		throw FileDoesNotExistException(message);
+	}
+	f1.close();
+}
+
+void Util::deleteFile(string fileName) {
+	ifstream f1(fileName.c_str());
+	if (f1) {
+		if (remove(fileName.c_str()) != 0) {
+			cerr << "Could not remove: " << fileName << endl;
+		} else {
+			cout << "Deleting: " << fileName << endl;
+		}
+	} else {
+		cerr << "Warning! This file does not exist: " << fileName << endl;
+	}
+	f1.close();
+}
+
+void Util::deleteFilesUnderDirectory(string dirName) {
+	// This function may not be platform-independent
+	// Credit: http://www.cplusplus.com/forum/beginner/9173/
+	DIR * dirPtr = opendir(dirName.c_str());
+	struct dirent * entry;
+	entry = readdir(dirPtr);
+	while (entry) {
+		string file(entry->d_name);
+		if (file.compare(string(".")) == 0 || file.compare(string("..")) == 0) {
+			// Skip current and parent directories
+		} else {
+			string url = dirName;
+			url.append(fileSeparator);
+			url.append(file);
+			deleteFile(url);
+			// cerr << "Deleting " << file << endl;
+		}
+		entry = readdir(dirPtr);
+	}
+	closedir(dirPtr);
+}
+
+bool Util::isOverlapping(int s1, int e1, int s2, int e2) {
+	if (s1 > e1) {
+		string msg("Util::isOverlapping. Invalid Input. s1 is ");
+		msg.append(Util::int2string(s1));
+		msg.append(". e1 is ");
+		msg.append(Util::int2string(e1));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	if (s2 > e2) {
+		string msg("Util::isOverlapping. Invalid Input. s2 is ");
+		msg.append(Util::int2string(s2));
+		msg.append(". e2 is ");
+		msg.append(Util::int2string(e2));
+		msg.append(".");
+		throw InvalidInputException(msg);
+	}
+
+	bool isStartWithin = s2 >= s1 && s2 <= e1;
+	bool isEndWithin = e2 >= s1 && e2 <= e1;
+	bool isIncluding = s2 >= s1 && e2 <= e1;
+	bool isIncluded = s1 >= s2 && e1 <= e2;
+	bool isAdjacent = (e1 == (s2 + 1)) || (e2 == (s1 + 1));
+
+	return (isStartWithin || isEndWithin || isIncluding || isIncluded
+			|| isAdjacent);
+}
+
+/**
+ * The input string is s.
+ * The reverse complement is rc.
+ * The start, and the end are inclusive.
+ */
+void Util::revCompDig(const char * s, int start, int end, string * rc) {
+	for (int i = end; i >= start; i--) {
+		char b = s[i];
+		switch (b) {
+		case 0:
+			rc->append(1, 3);
+			break;
+		case 3:
+			rc->append(1, 0);
+			break;
+		case 1:
+			rc->append(1, 2);
+			break;
+		case 2:
+			rc->append(1, 1);
+			break;
+		default:
+			string msg("Valid codes are 0-3. The invalid code is ");
+			msg.append(1, b);
+			throw InvalidInputException(msg);
+		}
+	}
+}
+
+void Util::revCompDig(string * s, string * rc) {
+	revCompDig(s->c_str(), 0, s->size() - 1, rc);
+
+	/*
+	 int len = s->size();
+	 for (int i = len - 1; i >= 0; i--) {
+	 char b = s->at(i);
+	 switch (b) {
+	 case 0:
+	 rc->append(1, 3);
+	 break;
+	 case 3:
+	 rc->append(1, 0);
+	 break;
+	 case 1:
+	 rc->append(1, 2);
+	 break;
+	 case 2:
+	 rc->append(1, 1);
+	 break;
+	 default:
+	 string msg("Valid codes are 0-3. The invalid code is ");
+	 msg.append(1, b);
+	 throw InvalidInputException(msg);
+	 }
+	 }
+	 */
+}
+
+void Util::writeFasta(const string& sequence, const string& header,
+		const string& outputFile) {
+	ofstream outMask;
+	outMask.open(outputFile.c_str(), ios::out);
+	outMask << header << endl;
+	int step = 50;
+	int len = sequence.size();
+	for (int i = 0; i < len; i = i + step) {
+		int e = (i + step - 1 > len - 1) ? len - 1 : i + step - 1;
+		for (int k = i; k <= e; k++) {
+			outMask << sequence[k];
+		}
+		outMask << endl;
+	}
+	outMask.close();
+}
+
+int Util::sumTotalLength(const vector<ILocation *> * list) {
+	int size = list->size();
+	int sum = 0;
+	for (int i = 0; i < size; i++) {
+		sum += list->at(i)->getLength();
+	}
+	return sum;
+}
diff --git a/src/utility/Util.h b/src/utility/Util.h
new file mode 100644
index 0000000..a9ed695
--- /dev/null
+++ b/src/utility/Util.h
@@ -0,0 +1,79 @@
+/*
+ * Util.h
+ *
+ *  Created on: Apr 24, 2012
+ *      Author: Hani Zakaria Girgis, PhD
+ */
+
+#ifndef UTIL_H_
+#define UTIL_H_
+
+#include "Location.h"
+#include "../exception/FileDoesNotExistException.h"
+#include "../exception/InvalidInputException.h"
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <stdlib.h>
+#include <dirent.h>
+
+using namespace std;
+using namespace utility;
+using namespace exception;
+
+namespace utility {
+class Util {
+private:
+	Util();
+	~Util();
+
+public:
+	static string * emptyString;
+	static string fileSeparator;
+	static void readFasta(string, vector<string> *, vector<string> *, bool);
+	static void readFasta(string, vector<string> *, vector<string> *);
+	static void readCoordinates(string, vector<Location *> *);
+	static void readChromList(string, vector<string> *, string);
+	static void toUpperCase(string*);
+	static void toUpperCase(string&);
+	static string int2string(int);
+	static string double2string(double);
+	static string long2string(long);
+	static void deleteFile(string);
+	static void deleteFilesUnderDirectory(string);
+	static void checkFile(string);
+	static bool isOverlapping(int, int, int, int);
+	static void revCompDig(string *, string *);
+	static void revCompDig(const char* sequence, int, int, string *);
+
+	static void writeFasta(const string&, const string&, const string&);
+
+	static int sumTotalLength(const vector<ILocation *> *);
+
+	/**
+	 * Delete the objects pointed to by pointers in a vector.
+	 * It does not delete the vector itself.
+	 *
+	 * Credit: http://stackoverflow.com/questions/594089/does-stdvector-clear-do-delete-free-memory-on-each-element
+	 */
+	template<class T>
+	static void deleteInVector(vector<T*> * deleteMe) {
+		while (!deleteMe->empty()) {
+			delete deleteMe->back();
+			deleteMe->pop_back();
+		}
+
+		// Set the size to zero
+		deleteMe->clear();
+
+		// Set the capacity to zero
+		vector<T*> empty;
+		deleteMe->swap(empty);
+	}
+};
+}
+
+#endif /* UTIL_H_ */