diff --git a/.gitignore b/.gitignore index 0eb0a70a..e0992782 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,13 @@ # ignore the C++ binary bh_tsne +#MatLab# +######## +*.asv +*.mexa64 +*.mexw64 +*.mexw64.pdb + # ignore data files *.txt *.tsv \ No newline at end of file diff --git a/fast_tsne.m b/fast_tsne.m index 3993f07d..e537ba01 100644 --- a/fast_tsne.m +++ b/fast_tsne.m @@ -1,12 +1,15 @@ -function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter) +function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter, rand_seed, stop_lying_iter, mom_switch_iter) %FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE % % mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg) +% mappedX = fast_tsne(X, [], initial_solution, perplexity, theta, alg) % % Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional % datapoints are specified in the NxD matrix X. The dimensionality of the % datapoints is reduced to initial_dims dimensions using PCA (default = 50) -% before t-SNE is performed. Next, t-SNE reduces the points to no_dims +% before t-SNE is performed. Alternatively, an initial solution obtained +% from an other dimensionality reduction technique may be specified in +% initial_solution (no_dims inferrred). Next, t-SNE reduces the points to no_dims % dimensions. The perplexity of the input similarities may be specified % through the perplexity variable (default = 30). The variable theta sets % the trade-off parameter between speed and accuracy: theta = 0 corresponds @@ -53,8 +56,14 @@ if ~exist('no_dims', 'var') || isempty(no_dims) no_dims = 2; end + have_initial_coords=false; + init_Y=[]; if ~exist('initial_dims', 'var') || isempty(initial_dims) - initial_dims = 50; + initial_dims = min(50, size(X, 2)); + elseif ~isscalar(initial_dims) %initial mapped coordinates + have_initial_coords=true; + init_Y=initial_dims; + no_dims=size(init_Y,2); end if ~exist('perplexity', 'var') || isempty(perplexity) perplexity = 30; @@ -68,62 +77,38 @@ if ~exist('max_iter', 'var') || isempty(max_iter) max_iter=1000; end + if ~exist('rand_seed', 'var') || isempty(rand_seed) + rand_seed=-1; %use current time + end + if ~exist('stop_lying_iter', 'var') || isempty(stop_lying_iter) + stop_lying_iter=250; + end + if ~exist('mom_switch_iter', 'var') || isempty(mom_switch_iter) + mom_switch_iter=250; + end % Perform the initial dimensionality reduction using PCA - X = double(X); - X = bsxfun(@minus, X, mean(X, 1)); - M = pca(X,'NumComponents',initial_dims,'Algorithm',alg); - X = X * M; + if ~have_initial_coords + X = double(X); + X = bsxfun(@minus, X, mean(X, 1)); + M = pca(X,'NumComponents',initial_dims,'Algorithm',alg); + X = X * M; + end tsne_path = which('fast_tsne'); tsne_path = fileparts(tsne_path); - % Compile t-SNE C code - if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix) - system(sprintf('g++ %s %s -o %s -O2',... + % Compile the mex file (including tSNE C code) if needed + mexfile = fullfile(tsne_path,'./bh_tsne'); + if exist(mexfile,'file') ~= 3 %check if mex file exists on path + mex(fullfile(tsne_path,'./tsne_mex.cpp'),... fullfile(tsne_path,'./sptree.cpp'),... fullfile(tsne_path,'./tsne.cpp'),... - fullfile(tsne_path,'./bh_tsne'))); + '-output',mexfile); end % Run the fast diffusion SNE implementation - write_data(X, no_dims, theta, perplexity, max_iter); - tic - [flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']); - if(flag~=0) - error(cmdout); - end - toc - [mappedX, landmarks, costs] = read_data; - landmarks = landmarks + 1; % correct for Matlab indexing - delete('data.dat'); - delete('result.dat'); -end - - -% Writes the datafile for the fast t-SNE implementation -function write_data(X, no_dims, theta, perplexity, max_iter) - [n, d] = size(X); - h = fopen('data.dat', 'wb'); - fwrite(h, n, 'integer*4'); - fwrite(h, d, 'integer*4'); - fwrite(h, theta, 'double'); - fwrite(h, perplexity, 'double'); - fwrite(h, no_dims, 'integer*4'); - fwrite(h, max_iter, 'integer*4'); - fwrite(h, X', 'double'); - fclose(h); -end - - -% Reads the result file from the fast t-SNE implementation -function [X, landmarks, costs] = read_data - h = fopen('result.dat', 'rb'); - n = fread(h, 1, 'integer*4'); - d = fread(h, 1, 'integer*4'); - X = fread(h, n * d, 'double'); - landmarks = fread(h, n, 'integer*4'); - costs = fread(h, n, 'double'); % this vector contains only zeros - X = reshape(X, [d n])'; - fclose(h); -end + X = X'; init_Y=init_Y'; %transpose to convert from column major (Matlab) to row major (C) order + mappedX = bh_tsne(X, no_dims, theta, perplexity, max_iter, rand_seed, init_Y, stop_lying_iter, mom_switch_iter); + mappedX = mappedX'; +end \ No newline at end of file diff --git a/sptree.cpp b/sptree.cpp index 10aa4ef8..80373695 100644 --- a/sptree.cpp +++ b/sptree.cpp @@ -38,6 +38,11 @@ #include "sptree.h" +//if we are compiling from matlab MEX, redefine printf to mexPrintf so it prints to matlab command window. +#ifdef MATLAB_MEX_FILE + #include "mex.h" + #define printf mexPrintf +#endif // Constructs cell Cell::Cell(unsigned int inp_dimension) { diff --git a/tsne.cpp b/tsne.cpp index fdb4b4bd..612f1c0e 100644 --- a/tsne.cpp +++ b/tsne.cpp @@ -40,6 +40,11 @@ #include "sptree.h" #include "tsne.h" +//if we are compiling from matlab MEX, redefine printf to mexPrintf so it prints to matlab command window. +#ifdef MATLAB_MEX_FILE + #include "mex.h" + #define printf mexPrintf +#endif using namespace std; diff --git a/tsne_mex.cpp b/tsne_mex.cpp new file mode 100644 index 00000000..3682a501 --- /dev/null +++ b/tsne_mex.cpp @@ -0,0 +1,114 @@ +// Mex function that runs Laurens van der Maaten's Barnes-Hut implementation of t-SNE +// (C) Patrick Fletcher 2018 + +#include + +#include "mex.h" +#include "tsne.h" + + +/* Input Arguments */ +#define DATA prhs[0] +#define NO_DIMS prhs[1] +#define THETA prhs[2] +#define PERPLEXITY prhs[3] +#define MAX_ITER prhs[4] +#define RAND_SEED prhs[5] +#define INIT_Y prhs[6] +#define STOP_LYING_ITER prhs[7] +#define MOM_SWITCH_ITER prhs[8] + +/* Output Arguments */ +#define MAPPED_DATA plhs[0] + +void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[] ) +{ + + /* Check for proper number of arguments */ + if (nrhs != 9) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidNumInputs", + "9 input arguments required."); + } + + if (nlhs > 1) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:maxlhs", + "Too many output arguments."); + } + + /* Input Type checking */ + if (!mxIsSingle(DATA) && !mxIsDouble(DATA)) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidT", + "bh_tsne requires that DATA be single or double"); + } + /* Check the dimensions of NO_DIMS, THETA, PERPLEXITY, MAX_ITER, and RAND_SEED. They all should be scalar */ + if (!mxIsScalar(NO_DIMS) || (!mxIsSingle(NO_DIMS) && !mxIsDouble(NO_DIMS)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that NO_DIMS be a scalar single or double"); + } + if (!mxIsScalar(THETA) || (!mxIsSingle(THETA) && !mxIsDouble(THETA)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that THETA be a scalar single or double"); + } + if (!mxIsScalar(PERPLEXITY) || (!mxIsSingle(PERPLEXITY) && !mxIsDouble(PERPLEXITY)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that PERPLEXITY be a scalar single or double"); + } + if (!mxIsScalar(MAX_ITER) || (!mxIsSingle(MAX_ITER) && !mxIsDouble(MAX_ITER)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that MAX_ITER be a scalar single or double"); + } + if (!mxIsScalar(RAND_SEED) || (!mxIsSingle(RAND_SEED) && !mxIsDouble(RAND_SEED)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that RAND_SEED be a scalar single or double"); + } + if (!mxIsSingle(INIT_Y) && !mxIsDouble(INIT_Y)) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidT", + "bh_tsne requires that INIT_Y be single or double"); + } + if (!mxIsScalar(STOP_LYING_ITER) || (!mxIsSingle(STOP_LYING_ITER) && !mxIsDouble(STOP_LYING_ITER)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that STOP_LYING_ITER be a scalar single or double"); + } + if (!mxIsScalar(MOM_SWITCH_ITER) || (!mxIsSingle(MOM_SWITCH_ITER) && !mxIsDouble(MOM_SWITCH_ITER)) ) { + mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput", + "BH_TSNE requires that MOM_SWITCH_ITER be a scalar single or double"); + } + + //dimensions of data: we are passing in X' (rows=vars, cols=observations) to get row major order in the data vector + int D = (int)mxGetM(DATA); + int N = (int)mxGetN(DATA); + + mexPrintf("N: %d, D: %d\n", N, D); + + std::vector X ( static_cast(mxGetData(DATA)), static_cast(mxGetData(DATA)) + mxGetNumberOfElements(DATA) ); + int no_dims=static_cast(mxGetScalar(NO_DIMS)); + double theta=static_cast(mxGetScalar(THETA)); + double perplexity=static_cast(mxGetScalar(PERPLEXITY)); + int max_iter=static_cast(mxGetScalar(MAX_ITER)); + int rand_seed=static_cast(mxGetScalar(RAND_SEED)); + int stop_lying_iter=static_cast(mxGetScalar(STOP_LYING_ITER)); + int mom_switch_iter=static_cast(mxGetScalar(MOM_SWITCH_ITER)); + + size_t y_length = no_dims * N; + std::vector Y(y_length); + + bool skip_random_init = !mxIsEmpty(INIT_Y); + if (skip_random_init) { + std::copy(static_cast(mxGetData(INIT_Y)), static_cast(mxGetData(INIT_Y)) + mxGetNumberOfElements(INIT_Y), Y.begin() ); + } + + //run tsne + //TSNE tsne(); + TSNE* tsne = new TSNE(); + tsne->run(X.data(), N, D, Y.data(), no_dims, perplexity, theta, rand_seed, skip_random_init, max_iter, stop_lying_iter, mom_switch_iter); + //void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, + // bool skip_random_init, int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250); + + //send the result back to Matlab + MAPPED_DATA = mxCreateDoubleMatrix( (mwSize)no_dims, (mwSize)N, mxREAL); + std::copy(Y.begin(), Y.end(), (double *)mxGetData(MAPPED_DATA)); + + delete(tsne); + return; +} +