From 4834086d1c7e282613220ce46d1ca8018ab9927f Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 11 Jul 2019 22:18:24 +0200 Subject: [PATCH 1/6] Parallel: compile with TBB if g++-9 --- CommonCompilerConfig.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/CommonCompilerConfig.cmake b/CommonCompilerConfig.cmake index 39a2813f50..41af4c570e 100644 --- a/CommonCompilerConfig.cmake +++ b/CommonCompilerConfig.cmake @@ -100,6 +100,7 @@ set(boost_required ON) if(NOT FORCE_CPP11) if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "9") + set(extra_lib_for_filesystem "tbb") #TODO hijacked for parallel TS, link with tbb for g++-9 set(CMAKE_CXX_STANDARD 17) set(boost_required OFF) elseif(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "8") From 2e0760b6c5be6e052debb971209369033f632161 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 11 Jul 2019 22:18:50 +0200 Subject: [PATCH 2/6] Parallel: WIP parallel demo on MNIST --- src/examples/mnist/MNIST_SP.cpp | 43 +++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/examples/mnist/MNIST_SP.cpp b/src/examples/mnist/MNIST_SP.cpp index 35c46fae1b..0144bd38eb 100644 --- a/src/examples/mnist/MNIST_SP.cpp +++ b/src/examples/mnist/MNIST_SP.cpp @@ -36,6 +36,9 @@ #include // MNIST data itself + read methods, namespace mnist:: #include // mnist::binarize_dataset +#include +#include +#include using namespace std; using namespace htm; @@ -67,12 +70,12 @@ class MNIST { public: UInt verbosity = 1; - const UInt train_dataset_iterations = 2u; //epochs somewhat help, at linear time + const UInt train_dataset_iterations = 5u; //epochs somewhat help, at linear time void setup() { - input.initialize({28, 28,1}); + input.initialize({28, 28, 1}); columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. sp.initialize( /* inputDimensions */ input.dimensions, @@ -83,12 +86,12 @@ void setup() { /* localAreaDensity */ 0.1f, // % active bits /* numActiveColumnsPerInhArea */ -1, /* stimulusThreshold */ 6u, - /* synPermInactiveDec */ 0.002f, //FIXME inactive decay permanence plays NO role, investigate! (slightly better w/o it) + /* synPermInactiveDec */ 0.002f, //very low values better for MNIST /* synPermActiveInc */ 0.14f, //takes upto 5x steps to get dis/connected /* synPermConnected */ 0.5f, //no difference, let's leave at 0.5 in the middle /* minPctOverlapDutyCycles */ 0.2f, //speed of re-learning? /* dutyCyclePeriod */ 1402, - /* boostStrength */ 2.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (=0.0), or "neutral"=1.0 + /* boostStrength */ 12.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (BOOSTING_DISABLED), or "neutral"=1.0 /* seed */ 4u, /* spVerbosity */ 1u, /* wrapAround */ true); // does not matter (helps slightly) @@ -126,17 +129,41 @@ void train() { } Random().shuffle( index.begin(), index.end() ); - for(const auto idx : index) { // index = order of label (shuffeled) + + + //parallel loop with TBB + std::mutex m; + + tbb::parallel_for( tbb::blocked_range(0, index.size()), + [&](tbb::blocked_range r) { +// for(size_t i=0; i< index.size(); i++) { // index = order of label (shuffeled) + for(auto i = r.begin(); i < r.end(); ++i) { + + const auto idx = index[i]; // Get the input & label const auto image = dataset.training_images.at(idx); const UInt label = dataset.training_labels.at(idx); // Compute & Train - input.setDense( image ); - sp.compute(input, true, columns); - clsr.learn( columns, {label} ); + SDR Pinput(input.dimensions); + Pinput.setDense( image ); + + SDR Pcolumns({28,28,8}); + sp.compute(Pinput, true, Pcolumns); //TODO change to return output? + //TODO make compute() const for parallelization? + + // sync this + m.lock(); + clsr.learn( Pcolumns, {label} ); + m.unlock(); + if( verbosity && (++i % 1000 == 0) ) cout << "." << flush; } + }); // !end of lambda + + + + if( verbosity ) cout << endl; cout << "epoch ended" << endl; From b27b40c07d4775a8995f65c36d6f9cfcd0e9e95e Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Fri, 12 Jul 2019 14:51:45 +0200 Subject: [PATCH 3/6] MNIST: provide both single, parallel version --- src/CMakeLists.txt | 23 +++ src/examples/mnist/MNIST_SP.cpp | 41 +---- src/examples/mnist/MNIST_SP_parallel.cpp | 225 +++++++++++++++++++++++ 3 files changed, 252 insertions(+), 37 deletions(-) create mode 100644 src/examples/mnist/MNIST_SP_parallel.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8f589ab5bf..5671397d23 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -393,6 +393,29 @@ add_custom_target(mnist COMMENT "Executing ${src_executable_mnistsp}" VERBATIM) +######################################################### +## MNIST Spatial Pooler (Parallel Example) +# +set(src_executable_mnistsp_par mnist_sp_par) +add_executable(${src_executable_mnistsp_par} examples/mnist/MNIST_SP_parallel.cpp) +target_link_libraries(${src_executable_mnistsp_par} + ${INTERNAL_LINKER_FLAGS} #par uses tbb + ${core_library} + ${COMMON_OS_LIBS} +) +target_compile_options( ${src_executable_mnistsp_par} PUBLIC ${INTERNAL_CXX_FLAGS}) +target_compile_definitions(${src_executable_mnistsp_par} PRIVATE ${COMMON_COMPILER_DEFINITIONS}) +# Pass MNIST data directory to main.cpp +target_compile_definitions(${src_executable_mnistsp_par} PRIVATE MNIST_DATA_LOCATION=${mnist_SOURCE_DIR}) +target_include_directories(${src_executable_mnistsp_par} PRIVATE + ${CORE_LIB_INCLUDES} + ${EXTERNAL_INCLUDES} +) +add_custom_target(mnist_parallel + COMMAND ${src_executable_mnistsp_par} + DEPENDS ${src_executable_mnistsp_par} + COMMENT "Executing ${src_executable_mnistsp_par}" + VERBATIM) ############ INSTALL ###################################### # diff --git a/src/examples/mnist/MNIST_SP.cpp b/src/examples/mnist/MNIST_SP.cpp index bd34f6ddfd..5d2196c259 100644 --- a/src/examples/mnist/MNIST_SP.cpp +++ b/src/examples/mnist/MNIST_SP.cpp @@ -36,9 +36,6 @@ #include // MNIST data itself + read methods, namespace mnist:: #include // mnist::binarize_dataset -#include -#include -#include using namespace std; using namespace htm; @@ -78,7 +75,7 @@ class MNIST { void setup() { - input.initialize({28, 28, 1}); + input.initialize({28, 28,1}); columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. sp.initialize( /* inputDimensions */ input.dimensions, @@ -89,12 +86,12 @@ void setup() { /* localAreaDensity */ 0.1f, // % active bits /* numActiveColumnsPerInhArea */ -1, /* stimulusThreshold */ 6u, - /* synPermInactiveDec */ 0.002f, //very low values better for MNIST + /* synPermInactiveDec */ 0.002f, //FIXME inactive decay permanence plays NO role, investigate! (slightly better w/o it) /* synPermActiveInc */ 0.14f, //takes upto 5x steps to get dis/connected /* synPermConnected */ 0.5f, //no difference, let's leave at 0.5 in the middle /* minPctOverlapDutyCycles */ 0.2f, //speed of re-learning? /* dutyCyclePeriod */ 1402, - /* boostStrength */ 12.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (BOOSTING_DISABLED), or "neutral"=1.0 + /* boostStrength */ 2.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (=0.0), or "neutral"=1.0 /* seed */ 4u, /* spVerbosity */ 1u, /* wrapAround */ true); // does not matter (helps slightly) @@ -138,48 +135,18 @@ void train(const bool skipSP=false) { } Random().shuffle( index.begin(), index.end() ); - - - //parallel loop with TBB - std::mutex m; - - tbb::parallel_for( tbb::blocked_range(0, index.size()), - [&](tbb::blocked_range r) { -// for(size_t i=0; i< index.size(); i++) { // index = order of label (shuffeled) - for(auto i = r.begin(); i < r.end(); ++i) { - - const auto idx = index[i]; + for(const auto idx : index) { // index = order of label (shuffeled) // Get the input & label const auto image = dataset.training_images.at(idx); const UInt label = dataset.training_labels.at(idx); // Compute & Train -<<<<<<< HEAD - SDR Pinput(input.dimensions); - Pinput.setDense( image ); - - SDR Pcolumns({28,28,8}); - sp.compute(Pinput, true, Pcolumns); //TODO change to return output? - //TODO make compute() const for parallelization? - - // sync this - m.lock(); - clsr.learn( Pcolumns, {label} ); - m.unlock(); - -======= input.setDense( image ); if(not skipSP) sp.compute(input, true, columns); clsr.learn( skipSP ? input : columns, {label} ); ->>>>>>> master_community if( verbosity && (++i % 1000 == 0) ) cout << "." << flush; } - }); // !end of lambda - - - - if( verbosity ) cout << endl; cout << "epoch ended" << endl; diff --git a/src/examples/mnist/MNIST_SP_parallel.cpp b/src/examples/mnist/MNIST_SP_parallel.cpp new file mode 100644 index 0000000000..3c20361af6 --- /dev/null +++ b/src/examples/mnist/MNIST_SP_parallel.cpp @@ -0,0 +1,225 @@ +/* --------------------------------------------------------------------- + * Copyright (C) 2018-2019, David McDougall, @breznak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Affero Public License for more details. + * + * You should have received a copy of the GNU Affero Public License + * along with this program. If not, see http://www.gnu.org/licenses. + * ---------------------------------------------------------------------- + */ + +/** + * Solving the MNIST dataset with Spatial Pooler. Parallel demonstartion using c++17 TS Parallel (TBB) + * Requirements: + * - c++17 codebase + * - compiler: MSVC 2017+, g++-9 + * - link with TBB (The Building Blocks) + * + * + * Note 1: the example is more ugly, because we parallelize for-loop, compared to std::algorithms `sort(execution::policy::par, a.begin(), a.end());` + * Note 2: Running SP.compute() in parallel is useless for sequences, but works for MNIST and the likes. + * + * This consists of a simple black & white image encoder, a spatial pool, and an + * SDR classifier. The task is to recognise images of hand written numbers 0-9. + * This should score at least 95%. + */ + +#include //uint8_t +#include +#include // std::ofstream +#include + +#include +#include +#include +#include + +#include // MNIST data itself + read methods, namespace mnist:: +#include // mnist::binarize_dataset + +//includes for TS Parallel +#include +#include +#include + +using namespace std; +using namespace htm; + +class MNIST { +/** + * RESULTS: Store results in the MNIST_SP.cpp file only, this parallel is just for experimenting with parallelization. + */ + + private: + SpatialPooler sp; + SDR input; + SDR columns; + Classifier clsr; + mnist::MNIST_dataset, uint8_t> dataset; + + public: + UInt verbosity = 1; + const UInt train_dataset_iterations = 20u; //epochs somewhat help, at linear time + + +void setup() { + + input.initialize({28, 28, 1}); + columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. + sp.initialize( + /* inputDimensions */ input.dimensions, + /* columnDimensions */ columns.dimensions, + /* potentialRadius */ 7, // with 2D, 7 results in 15x15 area, which is cca 25% for the input area. Slightly improves than 99999 aka "no topology, all to all connections" + /* potentialPct */ 0.1f, //we have only 10 classes, and << #columns. So we want to force each col to specialize. Cca 0.3 w "7" above, or very small (0.1) for "no topology". Cannot be too small due to internal checks. Speed++ + /* globalInhibition */ true, //Speed+++++++; SDR quality-- (global does have active nearby cols, which we want to avoid (local)); Results+-0 + /* localAreaDensity */ 0.1f, // % active bits + /* numActiveColumnsPerInhArea */ -1, + /* stimulusThreshold */ 6u, + /* synPermInactiveDec */ 0.002f, //very low values better for MNIST + /* synPermActiveInc */ 0.14f, //takes upto 5x steps to get dis/connected + /* synPermConnected */ 0.5f, //no difference, let's leave at 0.5 in the middle + /* minPctOverlapDutyCycles */ 0.2f, //speed of re-learning? + /* dutyCyclePeriod */ 1402, + /* boostStrength */ 12.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (BOOSTING_DISABLED), or "neutral"=1.0 + /* seed */ 4u, + /* spVerbosity */ 1u, + /* wrapAround */ true); // does not matter (helps slightly) + + // Save the connections to file for postmortem analysis. + ofstream dump("mnist_sp_initial.connections", ofstream::binary | ofstream::trunc | ofstream::out); + sp.connections.save( dump ); + dump.close(); + + clsr.initialize( /* alpha */ 0.001f); + + dataset = mnist::read_dataset(string("../ThirdParty/mnist_data/mnist-src/")); //from CMake + mnist::binarize_dataset(dataset); +} + + +/** + * train the SP on the training set. + * @param skipSP bool (default false) if set, output directly the input to the classifier. + * This is used for a baseline benchmark (Classifier directly learns on input images) + */ +void train(const bool skipSP=false) { + // Train + + if(verbosity) + cout << "Training for " << (train_dataset_iterations * dataset.training_labels.size()) + << " cycles ..." << endl; + size_t i = 0; + + Metrics inputStats(input, 1402); + Metrics columnStats(columns, 1402); + + Timer tTrain(true); + + for(auto epoch = 0u; epoch < train_dataset_iterations; epoch++) { + NTA_INFO << "epoch " << epoch; + // Shuffle the training data. + vector index( dataset.training_labels.size() ); + for (UInt i=0; i(0, index.size()), + [&](tbb::blocked_range r) { +// for(size_t i=0; i< index.size(); i++) { // index = order of label (shuffeled) + for(auto i = r.begin(); i < r.end(); ++i) { + + const auto idx = index[i]; + // Get the input & label + const auto image = dataset.training_images.at(idx); + const UInt label = dataset.training_labels.at(idx); + + // Compute & Train + SDR Pinput(input.dimensions); + Pinput.setDense( image ); + + SDR Pcolumns({28,28,8}); + if(not skipSP) + sp.compute(Pinput, true, Pcolumns); //TODO change to return output? + //TODO make compute() const for parallelization? + + // sync this + { + m.lock(); //TODO use better locks than just mutex, unique_lock etc + clsr.learn( Pcolumns, {label} ); + clsr.learn( skipSP ? Pinput : Pcolumns, {label} ); + m.unlock(); + } + if( verbosity && (++i % 1000 == 0) ) cout << "." << flush; + } + }); // !end of lambda + + + if( verbosity ) cout << endl; + + cout << "epoch ended" << endl; + cout << "inputStats " << inputStats << endl; + cout << "columnStats " << columnStats << endl; + cout << sp << endl; + } + + tTrain.stop(); + cout << "MNIST train time: " << tTrain.getElapsed() << endl; + + // Save the connections to file for postmortem analysis. + ofstream dump("mnist_sp_learned.connections", ofstream::binary | ofstream::trunc | ofstream::out); + sp.connections.save( dump ); + dump.close(); +} + +void test(const bool skipSP=false) { + // Test + Real score = 0; + UInt n_samples = 0; + if(verbosity) + cout << "Testing for " << dataset.test_labels.size() << " cycles ..." << endl; + for(UInt i = 0; i < dataset.test_labels.size(); i++) { + // Get the input & label + const auto image = dataset.test_images.at(i); + const UInt label = dataset.test_labels.at(i); + + // Compute + input.setDense( image ); + if(not skipSP) + sp.compute(input, false, columns); + + // Check results + if( argmax( clsr.infer( skipSP ? input : columns ) ) == label) + score += 1; + n_samples += 1; + if( verbosity && i % 1000 == 0 ) cout << "." << flush; + } + if( verbosity ) cout << endl; + cout << "===========RESULTs=================" << endl; + cout << "Score: " << 100.0 * score / n_samples << "% ("<< (n_samples - score) << " / " << n_samples << " wrong). " << endl; + cout << "SDR example: " << columns << endl; +} + +}; // End class MNIST + +int main(int argc, char **argv) { + MNIST m; + cout << "=========== Spatial Pooler (parallel) =====================" << endl; + m.setup(); + m.train(); + m.test(); + + return 0; +} + From a4e0113859d03bce330da7825c956236c3674ba0 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 17 Jul 2019 11:51:41 +0200 Subject: [PATCH 4/6] add Parallelizable header --- src/CMakeLists.txt | 1 + src/examples/mnist/MNIST_SP_parallel.cpp | 7 ++---- src/htm/types/Parallelizable.hpp | 29 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 src/htm/types/Parallelizable.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5671397d23..4a16a37c71 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -168,6 +168,7 @@ set(regions_files set(types_files htm/types/Exception.hpp + htm/types/Parallelizable.hpp htm/types/Types.hpp htm/types/Serializable.hpp htm/types/Sdr.hpp diff --git a/src/examples/mnist/MNIST_SP_parallel.cpp b/src/examples/mnist/MNIST_SP_parallel.cpp index d97b72d136..93d93b3fa9 100644 --- a/src/examples/mnist/MNIST_SP_parallel.cpp +++ b/src/examples/mnist/MNIST_SP_parallel.cpp @@ -41,14 +41,11 @@ #include #include +#include + #include // MNIST data itself + read methods, namespace mnist:: #include // mnist::binarize_dataset -//includes for TS Parallel -#include -#include -#include - using namespace std; using namespace htm; diff --git a/src/htm/types/Parallelizable.hpp b/src/htm/types/Parallelizable.hpp new file mode 100644 index 0000000000..d5ac97a5f9 --- /dev/null +++ b/src/htm/types/Parallelizable.hpp @@ -0,0 +1,29 @@ +/** + * Parallelizable.hpp + * + * include this header to files where you want to run blocks of code in parallel. + * We use [C++17 standard Parallel TS](https://en.cppreference.com/w/cpp/experimental/parallelism). + * + * Requirements: + * - c++17 + * - The Building Blocks (tbb) linked to the library + * - [supported compiler](https://en.cppreference.com/w/cpp/compiler_support#cpp17): currently GCC-9+, MSVC 2019 + * //TODO: switch to c++17 by default, or implement `transform()` temporarily as a custom method? + * + * Functionality: + * - include all needed headers for given platform, compiler, ... + * - handle define `NUM_PARALLEL=n` + * - to run in single thread, set NUM_PARALLEL=1 + * + */ + +//includes for TS Parallel +#include //std::execution::par, seq, par_unseq +// #include +#include + +namespace htm { +namespace parallel { + const constexpr auto mode = std::execution::par; //TODO ifdef NUM_PARALLEL=1 -> seq +} +} From 230905c2b54342f058c0bd0e18faabf32d45a045 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 17 Jul 2019 13:27:42 +0200 Subject: [PATCH 5/6] SP: global inhibition: parallel try execution::par_unseq for inhibition in SP. Results: takes much longer than seq. --- src/examples/mnist/MNIST_SP.cpp | 6 +++++- src/htm/algorithms/SpatialPooler.cpp | 9 ++++++++- src/htm/algorithms/SpatialPooler.hpp | 3 ++- src/htm/types/Parallelizable.hpp | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/examples/mnist/MNIST_SP.cpp b/src/examples/mnist/MNIST_SP.cpp index 8bd2d4677b..9925e32daf 100644 --- a/src/examples/mnist/MNIST_SP.cpp +++ b/src/examples/mnist/MNIST_SP.cpp @@ -76,7 +76,7 @@ class MNIST { void setup() { input.initialize({28, 28,1}); - columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. + columns.initialize({28, 28, 24}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. sp.initialize( /* inputDimensions */ input.dimensions, /* columnDimensions */ columns.dimensions, @@ -156,6 +156,10 @@ void train(const bool skipSP=false) { tTrain.stop(); cout << "MNIST train time: " << tTrain.getElapsed() << endl; + cout << "SP nth \t" << sp.tNth.getElapsed() << endl; + cout << "SP sort \t" << sp.tSort.getElapsed() << endl; + cout << "SP while\t" << sp.tWhile.getElapsed() << endl; + // Save the connections to file for postmortem analysis. ofstream dump("mnist_sp_learned.connections", ofstream::binary | ofstream::trunc | ofstream::out); diff --git a/src/htm/algorithms/SpatialPooler.cpp b/src/htm/algorithms/SpatialPooler.cpp index ff170b4227..c2d659017e 100644 --- a/src/htm/algorithms/SpatialPooler.cpp +++ b/src/htm/algorithms/SpatialPooler.cpp @@ -27,6 +27,7 @@ #include #include #include +#include using namespace std; using namespace htm; @@ -844,19 +845,25 @@ void SpatialPooler::inhibitColumnsGlobal_(const vector &overlaps, // faster than a regular sort because it stops after it partitions the // elements about the Nth element, with all elements on their correct side of // the Nth element. - std::nth_element( + tNth.start(); + std::nth_element(htm::parallel::mode, activeColumns.begin(), activeColumns.begin() + numDesired, activeColumns.end(), compare); // Remove the columns which lost the competition. activeColumns.resize(numDesired); + tNth.stop(); // Finish sorting the winner columns by their overlap. + tSort.start(); std::sort(activeColumns.begin(), activeColumns.end(), compare); + tSort.stop(); // Remove sub-threshold winners + tWhile.start(); while( !activeColumns.empty() && overlaps[activeColumns.back()] < stimulusThreshold_) activeColumns.pop_back(); + tWhile.stop(); } diff --git a/src/htm/algorithms/SpatialPooler.hpp b/src/htm/algorithms/SpatialPooler.hpp index 93832aab90..b6e9a53a4a 100644 --- a/src/htm/algorithms/SpatialPooler.hpp +++ b/src/htm/algorithms/SpatialPooler.hpp @@ -29,7 +29,7 @@ #include #include #include - +#include namespace htm { @@ -1210,6 +1210,7 @@ class SpatialPooler : public Serializable public: const Connections &connections = connections_; + mutable Timer tSort, tNth, tWhile; }; std::ostream & operator<<(std::ostream & out, const SpatialPooler &sp); diff --git a/src/htm/types/Parallelizable.hpp b/src/htm/types/Parallelizable.hpp index d5ac97a5f9..6957c85913 100644 --- a/src/htm/types/Parallelizable.hpp +++ b/src/htm/types/Parallelizable.hpp @@ -24,6 +24,6 @@ namespace htm { namespace parallel { - const constexpr auto mode = std::execution::par; //TODO ifdef NUM_PARALLEL=1 -> seq + const constexpr auto mode = std::execution::par_unseq; //TODO ifdef NUM_PARALLEL=1 -> seq } } From ab572cd2db60937d387294007374b4bb2618dbd7 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Wed, 17 Jul 2019 13:42:34 +0200 Subject: [PATCH 6/6] Revert "MNIST: provide both single, parallel version" This reverts commit b27b40c07d4775a8995f65c36d6f9cfcd0e9e95e. --- src/CMakeLists.txt | 23 --- src/examples/mnist/MNIST_SP.cpp | 6 +- src/examples/mnist/MNIST_SP_parallel.cpp | 221 ----------------------- 3 files changed, 1 insertion(+), 249 deletions(-) delete mode 100644 src/examples/mnist/MNIST_SP_parallel.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a16a37c71..3f080cebc7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -394,29 +394,6 @@ add_custom_target(mnist COMMENT "Executing ${src_executable_mnistsp}" VERBATIM) -######################################################### -## MNIST Spatial Pooler (Parallel Example) -# -set(src_executable_mnistsp_par mnist_sp_par) -add_executable(${src_executable_mnistsp_par} examples/mnist/MNIST_SP_parallel.cpp) -target_link_libraries(${src_executable_mnistsp_par} - ${INTERNAL_LINKER_FLAGS} #par uses tbb - ${core_library} - ${COMMON_OS_LIBS} -) -target_compile_options( ${src_executable_mnistsp_par} PUBLIC ${INTERNAL_CXX_FLAGS}) -target_compile_definitions(${src_executable_mnistsp_par} PRIVATE ${COMMON_COMPILER_DEFINITIONS}) -# Pass MNIST data directory to main.cpp -target_compile_definitions(${src_executable_mnistsp_par} PRIVATE MNIST_DATA_LOCATION=${mnist_SOURCE_DIR}) -target_include_directories(${src_executable_mnistsp_par} PRIVATE - ${CORE_LIB_INCLUDES} - ${EXTERNAL_INCLUDES} -) -add_custom_target(mnist_parallel - COMMAND ${src_executable_mnistsp_par} - DEPENDS ${src_executable_mnistsp_par} - COMMENT "Executing ${src_executable_mnistsp_par}" - VERBATIM) ############ INSTALL ###################################### # diff --git a/src/examples/mnist/MNIST_SP.cpp b/src/examples/mnist/MNIST_SP.cpp index 9925e32daf..8bd2d4677b 100644 --- a/src/examples/mnist/MNIST_SP.cpp +++ b/src/examples/mnist/MNIST_SP.cpp @@ -76,7 +76,7 @@ class MNIST { void setup() { input.initialize({28, 28,1}); - columns.initialize({28, 28, 24}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. + columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. sp.initialize( /* inputDimensions */ input.dimensions, /* columnDimensions */ columns.dimensions, @@ -156,10 +156,6 @@ void train(const bool skipSP=false) { tTrain.stop(); cout << "MNIST train time: " << tTrain.getElapsed() << endl; - cout << "SP nth \t" << sp.tNth.getElapsed() << endl; - cout << "SP sort \t" << sp.tSort.getElapsed() << endl; - cout << "SP while\t" << sp.tWhile.getElapsed() << endl; - // Save the connections to file for postmortem analysis. ofstream dump("mnist_sp_learned.connections", ofstream::binary | ofstream::trunc | ofstream::out); diff --git a/src/examples/mnist/MNIST_SP_parallel.cpp b/src/examples/mnist/MNIST_SP_parallel.cpp deleted file mode 100644 index 93d93b3fa9..0000000000 --- a/src/examples/mnist/MNIST_SP_parallel.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* --------------------------------------------------------------------- - * Copyright (C) 2018-2019, David McDougall, @breznak - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero Public License version 3 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU Affero Public License for more details. - * - * You should have received a copy of the GNU Affero Public License - * along with this program. If not, see http://www.gnu.org/licenses. - * ---------------------------------------------------------------------- - */ - -/** - * Solving the MNIST dataset with Spatial Pooler. Parallel demonstartion using c++17 TS Parallel (TBB) - * Requirements: - * - c++17 codebase - * - compiler: MSVC 2017+, g++-9 - * - link with TBB (The Building Blocks) - * - * - * Note 1: the example is more ugly, because we parallelize for-loop, compared to std::algorithms `sort(execution::policy::par, a.begin(), a.end());` - * Note 2: Running SP.compute() in parallel is useless for sequences, but works for MNIST and the likes. - * - * This consists of a simple black & white image encoder, a spatial pool, and an - * SDR classifier. The task is to recognise images of hand written numbers 0-9. - * This should score at least 95%. - */ - -#include //uint8_t -#include -#include // std::ofstream -#include - -#include -#include -#include -#include - -#include - -#include // MNIST data itself + read methods, namespace mnist:: -#include // mnist::binarize_dataset - -using namespace std; -using namespace htm; - -class MNIST { -/** - * RESULTS: Store results in the MNIST_SP.cpp file only, this parallel is just for experimenting with parallelization. - */ - - private: - SpatialPooler sp; - SDR input; - SDR columns; - Classifier clsr; - mnist::MNIST_dataset, uint8_t> dataset; - - public: - UInt verbosity = 1; - const UInt train_dataset_iterations = 20u; //epochs somewhat help, at linear time - - -void setup() { - - input.initialize({28, 28, 1}); - columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. - sp.initialize( - /* inputDimensions */ input.dimensions, - /* columnDimensions */ columns.dimensions, - /* potentialRadius */ 7, // with 2D, 7 results in 15x15 area, which is cca 25% for the input area. Slightly improves than 99999 aka "no topology, all to all connections" - /* potentialPct */ 0.1f, //we have only 10 classes, and << #columns. So we want to force each col to specialize. Cca 0.3 w "7" above, or very small (0.1) for "no topology". Cannot be too small due to internal checks. Speed++ - /* globalInhibition */ true, //Speed+++++++; SDR quality-- (global does have active nearby cols, which we want to avoid (local)); Results+-0 - /* localAreaDensity */ 0.1f, // % active bits - /* stimulusThreshold */ 6u, - /* synPermInactiveDec */ 0.002f, //very low values better for MNIST - /* synPermActiveInc */ 0.14f, //takes upto 5x steps to get dis/connected - /* synPermConnected */ 0.5f, //no difference, let's leave at 0.5 in the middle - /* minPctOverlapDutyCycles */ 0.2f, //speed of re-learning? - /* dutyCyclePeriod */ 1402, - /* boostStrength */ 12.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (BOOSTING_DISABLED), or "neutral"=1.0 - /* seed */ 4u, - /* spVerbosity */ 1u, - /* wrapAround */ true); // does not matter (helps slightly) - - // Save the connections to file for postmortem analysis. - ofstream dump("mnist_sp_initial.connections", ofstream::binary | ofstream::trunc | ofstream::out); - sp.connections.save( dump ); - dump.close(); - - clsr.initialize( /* alpha */ 0.001f); - - dataset = mnist::read_dataset(string("../ThirdParty/mnist_data/mnist-src/")); //from CMake - mnist::binarize_dataset(dataset); -} - - -/** - * train the SP on the training set. - * @param skipSP bool (default false) if set, output directly the input to the classifier. - * This is used for a baseline benchmark (Classifier directly learns on input images) - */ -void train(const bool skipSP=false) { - // Train - - if(verbosity) - cout << "Training for " << (train_dataset_iterations * dataset.training_labels.size()) - << " cycles ..." << endl; - size_t i = 0; - - Metrics inputStats(input, 1402); - Metrics columnStats(columns, 1402); - - Timer tTrain(true); - - for(auto epoch = 0u; epoch < train_dataset_iterations; epoch++) { - NTA_INFO << "epoch " << epoch; - // Shuffle the training data. - vector index( dataset.training_labels.size() ); - for (UInt i=0; i(0, index.size()), - [&](tbb::blocked_range r) { -// for(size_t i=0; i< index.size(); i++) { // index = order of label (shuffeled) - for(auto i = r.begin(); i < r.end(); ++i) { - - const auto idx = index[i]; - // Get the input & label - const auto image = dataset.training_images.at(idx); - const UInt label = dataset.training_labels.at(idx); - - // Compute & Train - SDR Pinput(input.dimensions); - Pinput.setDense( image ); - - SDR Pcolumns({28,28,8}); - if(not skipSP) - sp.compute(Pinput, true, Pcolumns); //TODO change to return output? - //TODO make compute() const for parallelization? - - // sync this - { - m.lock(); //TODO use better locks than just mutex, unique_lock etc - clsr.learn( Pcolumns, {label} ); - clsr.learn( skipSP ? Pinput : Pcolumns, {label} ); - m.unlock(); - } - if( verbosity && (++i % 1000 == 0) ) cout << "." << flush; - } - }); // !end of lambda - - - if( verbosity ) cout << endl; - - cout << "epoch ended" << endl; - cout << "inputStats " << inputStats << endl; - cout << "columnStats " << columnStats << endl; - cout << sp << endl; - } - - tTrain.stop(); - cout << "MNIST train time: " << tTrain.getElapsed() << endl; - - // Save the connections to file for postmortem analysis. - ofstream dump("mnist_sp_learned.connections", ofstream::binary | ofstream::trunc | ofstream::out); - sp.connections.save( dump ); - dump.close(); -} - -void test(const bool skipSP=false) { - // Test - Real score = 0; - UInt n_samples = 0; - if(verbosity) - cout << "Testing for " << dataset.test_labels.size() << " cycles ..." << endl; - for(UInt i = 0; i < dataset.test_labels.size(); i++) { - // Get the input & label - const auto image = dataset.test_images.at(i); - const UInt label = dataset.test_labels.at(i); - - // Compute - input.setDense( image ); - if(not skipSP) - sp.compute(input, false, columns); - - // Check results - if( argmax( clsr.infer( skipSP ? input : columns ) ) == label) - score += 1; - n_samples += 1; - if( verbosity && i % 1000 == 0 ) cout << "." << flush; - } - if( verbosity ) cout << endl; - cout << "===========RESULTs=================" << endl; - cout << "Score: " << 100.0 * score / n_samples << "% ("<< (n_samples - score) << " / " << n_samples << " wrong). " << endl; - cout << "SDR example: " << columns << endl; -} - -}; // End class MNIST - -int main(int argc, char **argv) { - MNIST m; - cout << "=========== Spatial Pooler (parallel) =====================" << endl; - m.setup(); - m.train(); - m.test(); - - return 0; -} -