Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

convert Matlab interface to use of MEX function to bypass file i/o #77

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
# ignore the C++ binary
bh_tsne

#MatLab#
########
*.asv
*.mexa64
*.mexw64
*.mexw64.pdb

# ignore data files
*.txt
*.tsv
87 changes: 36 additions & 51 deletions fast_tsne.m
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter)
function mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg, max_iter, rand_seed, stop_lying_iter, mom_switch_iter)
%FAST_TSNE Runs the C++ implementation of Barnes-Hut t-SNE
%
% mappedX = fast_tsne(X, no_dims, initial_dims, perplexity, theta, alg)
% mappedX = fast_tsne(X, [], initial_solution, perplexity, theta, alg)
%
% Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional
% datapoints are specified in the NxD matrix X. The dimensionality of the
% datapoints is reduced to initial_dims dimensions using PCA (default = 50)
% before t-SNE is performed. Next, t-SNE reduces the points to no_dims
% before t-SNE is performed. Alternatively, an initial solution obtained
% from an other dimensionality reduction technique may be specified in
% initial_solution (no_dims inferrred). Next, t-SNE reduces the points to no_dims
% dimensions. The perplexity of the input similarities may be specified
% through the perplexity variable (default = 30). The variable theta sets
% the trade-off parameter between speed and accuracy: theta = 0 corresponds
Expand Down Expand Up @@ -53,8 +56,14 @@
if ~exist('no_dims', 'var') || isempty(no_dims)
no_dims = 2;
end
have_initial_coords=false;
init_Y=[];
if ~exist('initial_dims', 'var') || isempty(initial_dims)
initial_dims = 50;
initial_dims = min(50, size(X, 2));
elseif ~isscalar(initial_dims) %initial mapped coordinates
have_initial_coords=true;
init_Y=initial_dims;
no_dims=size(init_Y,2);
end
if ~exist('perplexity', 'var') || isempty(perplexity)
perplexity = 30;
Expand All @@ -68,62 +77,38 @@
if ~exist('max_iter', 'var') || isempty(max_iter)
max_iter=1000;
end
if ~exist('rand_seed', 'var') || isempty(rand_seed)
rand_seed=-1; %use current time
end
if ~exist('stop_lying_iter', 'var') || isempty(stop_lying_iter)
stop_lying_iter=250;
end
if ~exist('mom_switch_iter', 'var') || isempty(mom_switch_iter)
mom_switch_iter=250;
end

% Perform the initial dimensionality reduction using PCA
X = double(X);
X = bsxfun(@minus, X, mean(X, 1));
M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);
X = X * M;
if ~have_initial_coords
X = double(X);
X = bsxfun(@minus, X, mean(X, 1));
M = pca(X,'NumComponents',initial_dims,'Algorithm',alg);
X = X * M;
end

tsne_path = which('fast_tsne');
tsne_path = fileparts(tsne_path);

% Compile t-SNE C code
if(~exist(fullfile(tsne_path,'./bh_tsne'),'file') && isunix)
system(sprintf('g++ %s %s -o %s -O2',...
% Compile the mex file (including tSNE C code) if needed
mexfile = fullfile(tsne_path,'./bh_tsne');
if exist(mexfile,'file') ~= 3 %check if mex file exists on path
mex(fullfile(tsne_path,'./tsne_mex.cpp'),...
fullfile(tsne_path,'./sptree.cpp'),...
fullfile(tsne_path,'./tsne.cpp'),...
fullfile(tsne_path,'./bh_tsne')));
'-output',mexfile);
end

% Run the fast diffusion SNE implementation
write_data(X, no_dims, theta, perplexity, max_iter);
tic
[flag, cmdout] = system(['"' fullfile(tsne_path,'./bh_tsne') '"']);
if(flag~=0)
error(cmdout);
end
toc
[mappedX, landmarks, costs] = read_data;
landmarks = landmarks + 1; % correct for Matlab indexing
delete('data.dat');
delete('result.dat');
end


% Writes the datafile for the fast t-SNE implementation
function write_data(X, no_dims, theta, perplexity, max_iter)
[n, d] = size(X);
h = fopen('data.dat', 'wb');
fwrite(h, n, 'integer*4');
fwrite(h, d, 'integer*4');
fwrite(h, theta, 'double');
fwrite(h, perplexity, 'double');
fwrite(h, no_dims, 'integer*4');
fwrite(h, max_iter, 'integer*4');
fwrite(h, X', 'double');
fclose(h);
end


% Reads the result file from the fast t-SNE implementation
function [X, landmarks, costs] = read_data
h = fopen('result.dat', 'rb');
n = fread(h, 1, 'integer*4');
d = fread(h, 1, 'integer*4');
X = fread(h, n * d, 'double');
landmarks = fread(h, n, 'integer*4');
costs = fread(h, n, 'double'); % this vector contains only zeros
X = reshape(X, [d n])';
fclose(h);
end
X = X'; init_Y=init_Y'; %transpose to convert from column major (Matlab) to row major (C) order
mappedX = bh_tsne(X, no_dims, theta, perplexity, max_iter, rand_seed, init_Y, stop_lying_iter, mom_switch_iter);
mappedX = mappedX';
end
5 changes: 5 additions & 0 deletions sptree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#include "sptree.h"


//if we are compiling from matlab MEX, redefine printf to mexPrintf so it prints to matlab command window.
#ifdef MATLAB_MEX_FILE
#include "mex.h"
#define printf mexPrintf
#endif

// Constructs cell
Cell::Cell(unsigned int inp_dimension) {
Expand Down
5 changes: 5 additions & 0 deletions tsne.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
#include "sptree.h"
#include "tsne.h"

//if we are compiling from matlab MEX, redefine printf to mexPrintf so it prints to matlab command window.
#ifdef MATLAB_MEX_FILE
#include "mex.h"
#define printf mexPrintf
#endif

using namespace std;

Expand Down
114 changes: 114 additions & 0 deletions tsne_mex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Mex function that runs Laurens van der Maaten's Barnes-Hut implementation of t-SNE
// (C) Patrick Fletcher 2018

#include <vector>

#include "mex.h"
#include "tsne.h"


/* Input Arguments */
#define DATA prhs[0]
#define NO_DIMS prhs[1]
#define THETA prhs[2]
#define PERPLEXITY prhs[3]
#define MAX_ITER prhs[4]
#define RAND_SEED prhs[5]
#define INIT_Y prhs[6]
#define STOP_LYING_ITER prhs[7]
#define MOM_SWITCH_ITER prhs[8]

/* Output Arguments */
#define MAPPED_DATA plhs[0]

void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[] )
{

/* Check for proper number of arguments */
if (nrhs != 9) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidNumInputs",
"9 input arguments required.");
}

if (nlhs > 1) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:maxlhs",
"Too many output arguments.");
}

/* Input Type checking */
if (!mxIsSingle(DATA) && !mxIsDouble(DATA)) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidT",
"bh_tsne requires that DATA be single or double");
}
/* Check the dimensions of NO_DIMS, THETA, PERPLEXITY, MAX_ITER, and RAND_SEED. They all should be scalar */
if (!mxIsScalar(NO_DIMS) || (!mxIsSingle(NO_DIMS) && !mxIsDouble(NO_DIMS)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that NO_DIMS be a scalar single or double");
}
if (!mxIsScalar(THETA) || (!mxIsSingle(THETA) && !mxIsDouble(THETA)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that THETA be a scalar single or double");
}
if (!mxIsScalar(PERPLEXITY) || (!mxIsSingle(PERPLEXITY) && !mxIsDouble(PERPLEXITY)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that PERPLEXITY be a scalar single or double");
}
if (!mxIsScalar(MAX_ITER) || (!mxIsSingle(MAX_ITER) && !mxIsDouble(MAX_ITER)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that MAX_ITER be a scalar single or double");
}
if (!mxIsScalar(RAND_SEED) || (!mxIsSingle(RAND_SEED) && !mxIsDouble(RAND_SEED)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that RAND_SEED be a scalar single or double");
}
if (!mxIsSingle(INIT_Y) && !mxIsDouble(INIT_Y)) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidT",
"bh_tsne requires that INIT_Y be single or double");
}
if (!mxIsScalar(STOP_LYING_ITER) || (!mxIsSingle(STOP_LYING_ITER) && !mxIsDouble(STOP_LYING_ITER)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that STOP_LYING_ITER be a scalar single or double");
}
if (!mxIsScalar(MOM_SWITCH_ITER) || (!mxIsSingle(MOM_SWITCH_ITER) && !mxIsDouble(MOM_SWITCH_ITER)) ) {
mexErrMsgIdAndTxt( "MATLAB:bh_tsne:invalidInput",
"BH_TSNE requires that MOM_SWITCH_ITER be a scalar single or double");
}

//dimensions of data: we are passing in X' (rows=vars, cols=observations) to get row major order in the data vector
int D = (int)mxGetM(DATA);
int N = (int)mxGetN(DATA);

mexPrintf("N: %d, D: %d\n", N, D);

std::vector<double> X ( static_cast<double *>(mxGetData(DATA)), static_cast<double *>(mxGetData(DATA)) + mxGetNumberOfElements(DATA) );
int no_dims=static_cast<int>(mxGetScalar(NO_DIMS));
double theta=static_cast<double>(mxGetScalar(THETA));
double perplexity=static_cast<double>(mxGetScalar(PERPLEXITY));
int max_iter=static_cast<int>(mxGetScalar(MAX_ITER));
int rand_seed=static_cast<int>(mxGetScalar(RAND_SEED));
int stop_lying_iter=static_cast<int>(mxGetScalar(STOP_LYING_ITER));
int mom_switch_iter=static_cast<int>(mxGetScalar(MOM_SWITCH_ITER));

size_t y_length = no_dims * N;
std::vector<double> Y(y_length);

bool skip_random_init = !mxIsEmpty(INIT_Y);
if (skip_random_init) {
std::copy(static_cast<double *>(mxGetData(INIT_Y)), static_cast<double *>(mxGetData(INIT_Y)) + mxGetNumberOfElements(INIT_Y), Y.begin() );
}

//run tsne
//TSNE tsne();
TSNE* tsne = new TSNE();
tsne->run(X.data(), N, D, Y.data(), no_dims, perplexity, theta, rand_seed, skip_random_init, max_iter, stop_lying_iter, mom_switch_iter);
//void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed,
// bool skip_random_init, int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250);

//send the result back to Matlab
MAPPED_DATA = mxCreateDoubleMatrix( (mwSize)no_dims, (mwSize)N, mxREAL);
std::copy(Y.begin(), Y.end(), (double *)mxGetData(MAPPED_DATA));

delete(tsne);
return;
}