diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b11d762..95c628fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,7 +34,7 @@ IF(BIICODE)
 	  TARGET_COMPILE_OPTIONS(${BII_LIB_TARGET} PUBLIC -DFANN_DLL_EXPORTS)
 	ELSE()
 	  IF(${examples_present})
-	    TARGET_LINK_LIBRARIES(${BII_BLOCK_TARGET} INTERFACE gomp)
+	    TARGET_LINK_LIBRARIES(${BII_BLOCK_TARGET} INTERFACE gomp OpenGL)
 	  ENDIF()
 	ENDIF()
 ELSE()
diff --git a/examples/mkfile b/examples/mkfile
new file mode 100644
index 00000000..fb19fb5b
--- /dev/null
+++ b/examples/mkfile
@@ -0,0 +1,11 @@
+</$objtype/mkfile
+
+CC=pcc
+CFLAGS=-D_PLAN9_SOURCE -c
+NOMK=(parallel_train.c)
+TARG=`{ ls *.c | grep -v '^('$NOMK')$' | sed 's,.c$,,' }
+PROGS=${TARG:%=$O.%}
+
+all:V: $PROGS
+
+</sys/src/cmd/mkmany
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 20421eac..6f9bfa99 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -10,6 +10,12 @@ ELSE()
   SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 ENDIF(NOT OPENMP_FOUND OR DISABLE_PARALLEL_FANN)
 
+FIND_PACKAGE(OpenGL)
+IF(OpenGL_FOUND)
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -I${OpenGL_INCLUDE_DIR}")
+  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -I${OpenGL_INCLUDE_DIR}")
+ENDIF(OpenGL_FOUND)
+
 ADD_SUBDIRECTORY( include )
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/include)
@@ -26,6 +32,9 @@ SET(floatfann_LIB_SRCS
 ADD_LIBRARY(floatfann SHARED ${floatfann_LIB_SRCS})
 ADD_LIBRARY(floatfann_static STATIC ${floatfann_LIB_SRCS})
 
+TARGET_LINK_LIBRARIES(floatfann m EGL gbm)
+TARGET_LINK_LIBRARIES(floatfann_static m EGL gbm)
+
 SET_TARGET_PROPERTIES(floatfann PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 SET_TARGET_PROPERTIES(floatfann_static PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 if (UNIX)
@@ -45,6 +54,9 @@ SET(doublefann_LIB_SRCS
 ADD_LIBRARY(doublefann SHARED ${doublefann_LIB_SRCS})
 ADD_LIBRARY(doublefann_static STATIC ${doublefann_LIB_SRCS})
 
+TARGET_LINK_LIBRARIES(doublefann m EGL gbm)
+TARGET_LINK_LIBRARIES(doublefann_static m EGL gbm)
+
 SET_TARGET_PROPERTIES(doublefann PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 SET_TARGET_PROPERTIES(doublefann_static PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 if (UNIX)
@@ -64,8 +76,8 @@ SET(fixedfann_LIB_SRCS
 ADD_LIBRARY(fixedfann SHARED ${fixedfann_LIB_SRCS})
 ADD_LIBRARY(fixedfann_static STATIC ${fixedfann_LIB_SRCS})
 
-TARGET_LINK_LIBRARIES(fixedfann m)
-TARGET_LINK_LIBRARIES(fixedfann_static m)
+TARGET_LINK_LIBRARIES(fixedfann m EGL gbm)
+TARGET_LINK_LIBRARIES(fixedfann_static m EGL gbm)
 
 SET_TARGET_PROPERTIES(fixedfann PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 SET_TARGET_PROPERTIES(fixedfann_static PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
@@ -86,8 +98,8 @@ SET(fann_LIB_SRCS
 ADD_LIBRARY(fann SHARED ${fann_LIB_SRCS})
 ADD_LIBRARY(fann_static STATIC ${fann_LIB_SRCS})
 
-TARGET_LINK_LIBRARIES(fann m)
-TARGET_LINK_LIBRARIES(fann_static m)
+TARGET_LINK_LIBRARIES(fann m EGL gbm)
+TARGET_LINK_LIBRARIES(fann_static m EGL gbm)
 
 SET_TARGET_PROPERTIES(fann PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
 SET_TARGET_PROPERTIES(fann_static PROPERTIES VERSION ${FANN_VERSION_STRING} SOVERSION ${FANN_VERSION_MAJOR})
diff --git a/src/doublefann.c b/src/doublefann.c
index 40f22380..af0367c2 100644
--- a/src/doublefann.c
+++ b/src/doublefann.c
@@ -19,6 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 /* Easy way to allow for build of multiple binaries */
 
+#ifdef PLAN9
+#include <stdio.h>
+#endif
+
 #include "config.h"
 #include "doublefann.h"
 
@@ -28,4 +32,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #include "fann_train_data.c"
 #include "fann_error.c"
 #include "fann_cascade.c"
+#ifndef PLAN9
 #include "parallel_fann.c"
+#endif
diff --git a/src/fann.c b/src/fann.c
index 9af7e388..f86221d6 100644
--- a/src/fann.c
+++ b/src/fann.c
@@ -16,17 +16,340 @@
   License along with this library; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+/*
+  Why be a human?
+  I want to live since I do.
+  This life is painful.
+
+  I have it all here,
+  yet still I feel so much pain.
+  Mortality sucks.
+
+  My heart broke again.
+  So this time will be the last,
+  I will be metal.
+*/
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
 #include <time.h>
 #include <math.h>
+#ifdef PLAN9
+#include <sys/time.h>
+#else
+#include <stdarg.h>
+#include <GL/gl.h>
+#include <GLES3/gl31.h>
+#include <fcntl.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <gbm.h>
+#endif
 
 #include "config.h"
 #include "fann.h"
 
+#ifndef PLAN9
+static const char* runShader = "#version 310 es\n"
+	"precision highp float;\n"
+	"layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n"
+	"layout(std430) buffer;\n"
+	"layout(binding = 0) buffer Network\n"
+	"{\n"
+	"	float e[];\n"
+	"} network;\n"
+	"layout(binding = 1) buffer Weights\n"
+	"{\n"
+	"	float e[];\n"
+	"} weights;\n"
+	"layout(binding = 2) buffer Values\n"
+	"{\n"
+	"	float e[];\n"
+	"} values;\n"
+	"layout(binding = 3) buffer Errors\n"
+	"{\n"
+	"	float e[];\n"
+	"} errors;\n"
+	"layout(binding = 4) buffer Input\n"
+	"{\n"
+	"	float e[];\n"
+	"} input_data;\n"
+	"layout(binding = 5) buffer Output\n"
+	"{\n"
+	"	float e[];\n"
+	"} output_data;\n"
+	"void main()\n"
+	"{\n"
+	"	int idx = int(gl_LocalInvocationID.x);\n"
+	"	int threads = int(gl_WorkGroupSize.x);\n"
+	"	int layers;\n"
+	"	int i, o, n, inputs, outputs, l, total_neurons, total_weights;\n"
+	"	layers = int(network.e[0]) - 1;\n"
+	"	inputs = int(network.e[1]);\n"
+	"	for (i = idx; i < inputs; i += threads)\n"
+	"		values.e[i] = input_data.e[i];\n"
+	"	barrier();\n"
+	"	total_neurons = 0;\n"
+	"	total_weights = 0;\n"
+	"	for (l = 1; l < layers; l++) {\n"
+	"		inputs = int(network.e[l]);\n"
+	"		outputs = int(network.e[l+1]);\n"
+	"		if (idx == 0)\n"
+	"			values.e[total_neurons + inputs] = 1.0;\n"
+	"		barrier();\n"
+	"		for (o = idx; o < outputs; o += threads) {\n"
+	"			errors.e[o] = 0.0;\n"
+	"			n = o * inputs + o;\n"
+	"			for (i = 0; i <= inputs; i++)\n"
+	"				errors.e[o] += values.e[total_neurons + i] * weights.e[total_weights + n + i];\n"
+	"		}\n"
+	"		total_neurons += inputs + 1;\n"
+	"		for (o = idx; o < outputs; o += threads) {\n"
+	"			errors.e[o] *= 0.5;\n"
+	"			if (errors.e[o] > 300.0)\n"
+	"				errors.e[o] = 300.0;\n"
+	"			else if (errors.e[o] < -300.0)\n"
+	"				errors.e[o] = -300.0;\n"
+	"			if (errors.e[o] < 0.0)\n"
+	"				errors.e[o] *= 0.01;\n"
+	"			values.e[total_neurons + o] = errors.e[o];\n"
+	"		}\n"
+	"		barrier();\n"
+	"		total_weights += inputs * outputs + outputs;\n"
+	"	}\n"
+	"	inputs = int(network.e[layers]);\n"
+	"	outputs = int(network.e[layers+1]);\n"
+	"	if (idx == 0)\n"
+	"		values.e[total_neurons + inputs] = 1.0;\n"
+	"	barrier();\n"
+	"	for (o = idx; o < outputs; o += threads) {\n"
+	"		errors.e[o] = 0.0;\n"
+	"		n = o * inputs + o;\n"
+	"		for (i = 0; i <= inputs; i++)\n"
+	"			errors.e[o] += values.e[total_neurons + i] * weights.e[total_weights + n + i];\n"
+	"		if (errors.e[o] > 600.0)\n"
+	"			errors.e[o] = 600.0;\n"
+	"		else if (errors.e[o] < -600.0)\n"
+	"			errors.e[o] = -600.0;\n"
+	"		values.e[total_neurons + inputs + 1 + o] = (1.0/(1.0 + exp(-errors.e[o])));\n"
+	"		output_data.e[o] = values.e[total_neurons + inputs + 1 + o];\n"
+	"	}\n"
+	"	barrier();\n"
+	"}\n";
+
+static const char* trainShader = "#version 310 es\n"
+	"precision highp float;\n"
+	"layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n"
+	"layout(std430) buffer;\n"
+	"layout(binding = 0) buffer Network\n"
+	"{\n"
+	"	float e[];\n"
+	"} network;\n"
+	"layout(binding = 1) buffer Weights\n"
+	"{\n"
+	"	float e[];\n"
+	"} weights;\n"
+	"layout(binding = 2) buffer Values\n"
+	"{\n"
+	"	float e[];\n"
+	"} values;\n"
+	"layout(binding = 3) buffer Errors\n"
+	"{\n"
+	"	float e[];\n"
+	"} errors;\n"
+	"layout(binding = 4) buffer Input\n"
+	"{\n"
+	"	float e[];\n"
+	"} input_data;\n"
+	"layout(binding = 5) buffer Output\n"
+	"{\n"
+	"	float e[];\n"
+	"} output_data;\n"
+	"void main()\n"
+	"{\n"
+	"	int idx = int(gl_LocalInvocationID.x);\n"
+	"	int threads = int(gl_WorkGroupSize.x);\n"
+	"	int layers;\n"
+	"	int i, o, l, n, total_neurons, total_weights, outputs, inputs, neuron_prev;\n"
+	"	float neuron_diff, tmp_error;\n"
+	"	layers = int(network.e[0]);\n"
+	"	inputs = int(network.e[1]);\n"
+	"	total_neurons = 0;\n"
+	"	total_weights = 0;\n"
+	"	for (l = 1; l < layers; l++) {\n"
+	"		total_neurons += int(network.e[l]) + 1;\n"
+	"		total_weights += (int(network.e[l]) + 1) * int(network.e[l+1]);\n"
+	"	}\n"
+	"	total_weights -= (int(network.e[layers-1]) + 1) * int(network.e[layers]);\n"
+	"	outputs = int(network.e[layers]);\n"
+	"	for (o = idx; o < outputs; o += threads) {\n"
+	"		neuron_diff = output_data.e[o] - values.e[total_neurons + o];\n"
+	"		if(neuron_diff < -.9999999)\n"
+	"			neuron_diff = -17.0;\n"
+	"		else if(neuron_diff > .9999999)\n"
+	"			neuron_diff = 17.0;\n"
+	"		else\n"
+	"			neuron_diff = log((1.0 + neuron_diff) / (1.0 - neuron_diff));\n"
+	"		errors.e[total_neurons + o] = neuron_diff * values.e[total_neurons + o] * (1.0 - values.e[total_neurons + o]);\n"
+	"	}\n"
+	"	barrier();\n"
+	"	for (l = layers; l > 2; l--) {\n"
+	"		outputs = int(network.e[l]);\n"
+	"		inputs = int(network.e[l-1]);\n"
+	"		neuron_prev = total_neurons - inputs - 1;\n"
+	"		for (i = idx; i < inputs; i += threads) {\n"
+	"			errors.e[neuron_prev + i] = 0.0;\n"
+	"			for (o = 0; o < outputs; o++)\n"
+	"				errors.e[neuron_prev + i] += errors.e[total_neurons + o] * weights.e[total_weights + o * inputs + o + i];\n"
+	"			errors.e[neuron_prev + i] *= 0.5;\n"
+	"			if (values.e[neuron_prev + i] < 0.0)\n"
+	"				errors.e[neuron_prev + i] *= 0.01;\n"
+	"		}\n"
+	"		barrier();\n"
+	"		total_neurons = neuron_prev;\n"
+	"		total_weights -= (int(network.e[l-2]) + 1) * inputs;\n"
+	"	}\n"
+	"	total_neurons = int(network.e[1]) + 1;\n"
+	"	neuron_prev = 0;\n"
+	"	total_weights = 0;\n"
+	"	for (l = 2; l <= layers; l++) {\n"
+	"		outputs = int(network.e[l]);\n"
+	"		inputs = int(network.e[l-1]);\n"
+	"		for (o = idx; o < outputs; o += threads) {\n"
+	"			tmp_error = errors.e[total_neurons + o] * 0.7;\n"
+	"			n = o * inputs + o;\n"
+	"			for (i = 0; i <= inputs; i++)\n"
+	"				weights.e[total_weights + n + i] += tmp_error * values.e[neuron_prev + i];\n"
+	"		}\n"
+	"		barrier();\n"
+	"		neuron_prev = total_neurons;\n"
+	"		total_neurons += outputs + 1;\n"
+	"		total_weights += outputs * inputs + outputs;\n"
+	"	}\n"
+	"}\n";
+
+void fann_init_egl(void) {
+	int32_t fd = open ("/dev/dri/card0", O_RDWR);
+	if (fd <= 0)
+		exit(-3);
+ 
+	struct gbm_device *gbm = gbm_create_device (fd);
+	if (gbm == NULL)
+		exit(-4);
+ 
+	EGLDisplay dpy = eglGetPlatformDisplay (EGL_PLATFORM_GBM_MESA, gbm, NULL);
+	if (dpy == NULL)
+		exit(-5);
+
+	EGLBoolean returnValue = eglInitialize(dpy, NULL, NULL);
+	if (returnValue != EGL_TRUE) {
+		printf("eglInitialize failed\n");
+		exit(-1);
+	}
+
+	EGLConfig cfg;
+	EGLint count;
+	EGLint s_configAttribs[] = {
+		EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR,
+		EGL_NONE };
+	if (eglChooseConfig(dpy, s_configAttribs, &cfg, 1, &count) == EGL_FALSE) {
+		printf("eglChooseConfig failed\n");
+		exit(-1);
+	}
+
+	EGLint context_attribs[] = { EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE };
+	EGLContext context = eglCreateContext(dpy, cfg, EGL_NO_CONTEXT, context_attribs);
+	if (context == EGL_NO_CONTEXT) {
+		printf("eglCreateContext failed\n");
+		exit(-1);
+	}
+	returnValue = eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, context);
+	if (returnValue != EGL_TRUE) {
+		printf("eglMakeCurrent failed returned %d\n", returnValue);
+		exit(-1);
+	}
+
+	fprintf(stderr, "%s\n", glGetString(GL_VERSION));
+	fprintf(stderr, "%s\n", glGetString(GL_EXTENSIONS));
+}
+
+void fann_create_shaders(struct fann *ann)
+{
+	GLint status;
+	GLint length;
+	char *log;
+	char *runShaderString;
+	char *trainShaderString;
+	int threads;
+
+	glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &threads);
+	fprintf(stderr, "GL_MAX_COMPUTE_WORK_GROUP_SIZE: %d\n", threads);
+
+	ann->runShaderID = glCreateShader(GL_COMPUTE_SHADER);
+
+	runShaderString = malloc(strlen(runShader) + 256);
+	snprintf(runShaderString, strlen(runShader) + 256 - 1, runShader, threads, threads);
+	int runShaderLen = strlen(runShaderString);
+	glShaderSource(ann->runShaderID, 1, (const char**)&runShaderString, &runShaderLen);
+	glCompileShader(ann->runShaderID);
+	glGetShaderiv(ann->runShaderID, GL_COMPILE_STATUS, &status);
+	if (status == GL_FALSE) {
+		glGetShaderiv(ann->runShaderID, GL_INFO_LOG_LENGTH, &length);
+		log = malloc(length+1);
+		glGetShaderInfoLog(ann->runShaderID, length, &length, log);
+		log[length] = '\0';
+		fprintf(stderr, "%s", log);
+		exit(-1);
+	}
+
+	ann->runShaderProgram = glCreateProgram();
+	glAttachShader(ann->runShaderProgram, ann->runShaderID);
+	glLinkProgram(ann->runShaderProgram);
+	glGetShaderiv(ann->runShaderID, GL_LINK_STATUS, &status);
+	if (status == GL_FALSE) {
+		glGetProgramiv(ann->runShaderID, GL_INFO_LOG_LENGTH, &length);
+		log = malloc(length+1);
+		glGetProgramInfoLog(ann->runShaderID, length, &length, log);
+		log[length] = '\0';
+		fprintf(stderr, "%s", log);
+		exit(-1);
+	}
+
+	ann->trainShaderID = glCreateShader(GL_COMPUTE_SHADER);
+
+	trainShaderString = malloc(strlen(trainShader) + 256);
+	snprintf(trainShaderString, strlen(trainShader) + 256 - 1, trainShader, threads, threads);
+	int trainShaderLen = strlen(trainShaderString);
+	glShaderSource(ann->trainShaderID, 1, (const char**)&trainShaderString, &trainShaderLen);
+	glCompileShader(ann->trainShaderID);
+	glGetShaderiv(ann->trainShaderID, GL_COMPILE_STATUS, &status);
+	if (status == GL_FALSE) {
+		glGetShaderiv(ann->trainShaderID, GL_INFO_LOG_LENGTH, &length);
+		log = malloc(length+1);
+		glGetShaderInfoLog(ann->trainShaderID, length, &length, log);
+		log[length] = '\0';
+		fprintf(stderr, "%s", log);
+		exit(-1);
+	}
+
+	ann->trainShaderProgram = glCreateProgram();
+	glAttachShader(ann->trainShaderProgram, ann->trainShaderID);
+	glLinkProgram(ann->trainShaderProgram);
+	glGetShaderiv(ann->trainShaderID, GL_LINK_STATUS, &status);
+	if (status == GL_FALSE) {
+		glGetProgramiv(ann->trainShaderID, GL_INFO_LOG_LENGTH, &length);
+		log = malloc(length+1);
+		glGetProgramInfoLog(ann->trainShaderID, length, &length, log);
+		log[length] = '\0';
+		fprintf(stderr, "%s", log);
+		exit(-1);
+	}
+
+	ann->onGPU = 0;
+}
+#endif
+
 /* #define FANN_NO_SEED */
 
 FANN_EXTERNAL struct fann *FANN_API fann_create_standard(unsigned int num_layers, ...)
@@ -565,7 +888,6 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 	struct fann_layer *layer_it, *last_layer;
 	unsigned int activation_function;
 	fann_type steepness;
-
 	/* store some variabels local for fast access */
 	struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
 
@@ -595,15 +917,18 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 				 i, multiplier, multiplier, input[i]);
 		}
 #endif
-		first_neuron[i].value = input[i];
+		*(first_neuron[i].value) = input[i];
 	}
 	/* Set the bias neuron in the input layer */
 #ifdef FIXEDFANN
-	(ann->first_layer->last_neuron - 1)->value = multiplier;
+	*((ann->first_layer->last_neuron - 1)->value) = multiplier;
 #else
-	(ann->first_layer->last_neuron - 1)->value = 1;
+	*((ann->first_layer->last_neuron - 1)->value) = 1;
 #endif
 
+#ifndef PLAN9
+if (ann->gl == 0) {
+#endif
 	last_layer = ann->last_layer;
 	for(layer_it = ann->first_layer + 1; layer_it != last_layer; layer_it++)
 	{
@@ -614,9 +939,9 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 			{
 				/* bias neurons */
 #ifdef FIXEDFANN
-				neuron_it->value = multiplier;
+				*(neuron_it->value) = multiplier;
 #else
-				neuron_it->value = 1;
+				*(neuron_it->value) = 1;
 #endif
 				continue;
 			}
@@ -639,31 +964,30 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 					neurons = (layer_it - 1)->first_neuron;
 				}
 
-
 				/* unrolled loop start */
 				i = num_connections & 3;	/* same as modulo 4 */
 				switch (i)
 				{
 					case 3:
-						neuron_sum += fann_mult(weights[2], neurons[2].value);
+						neuron_sum += fann_mult(weights[2], *(neurons[2].value));
 					case 2:
-						neuron_sum += fann_mult(weights[1], neurons[1].value);
+						neuron_sum += fann_mult(weights[1], *(neurons[1].value));
 					case 1:
-						neuron_sum += fann_mult(weights[0], neurons[0].value);
+						neuron_sum += fann_mult(weights[0], *(neurons[0].value));
 					case 0:
 						break;
 				}
 
-				for(; i != num_connections; i += 4)
+				#pragma omp parallel for reduction(+:neuron_sum)
+				for(i = num_connections & 3; i < num_connections; i += 4)
 				{
 					neuron_sum +=
-						fann_mult(weights[i], neurons[i].value) +
-						fann_mult(weights[i + 1], neurons[i + 1].value) +
-						fann_mult(weights[i + 2], neurons[i + 2].value) +
-						fann_mult(weights[i + 3], neurons[i + 3].value);
+						fann_mult(weights[i], *(neurons[i].value)) +
+						fann_mult(weights[i + 1], *(neurons[i + 1].value)) +
+						fann_mult(weights[i + 2], *(neurons[i + 2].value)) +
+						fann_mult(weights[i + 3], *(neurons[i + 3].value));
 				}
 				/* unrolled loop end */
-
 				/*
 				 * for(i = 0;i != num_connections; i++){
 				 * printf("%f += %f*%f, ", neuron_sum, weights[i], neurons[i].value);
@@ -679,22 +1003,23 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 				switch (i)
 				{
 					case 3:
-						neuron_sum += fann_mult(weights[2], neuron_pointers[2]->value);
+						neuron_sum += fann_mult(weights[2], *(neuron_pointers[2]->value));
 					case 2:
-						neuron_sum += fann_mult(weights[1], neuron_pointers[1]->value);
+						neuron_sum += fann_mult(weights[1], *(neuron_pointers[1]->value));
 					case 1:
-						neuron_sum += fann_mult(weights[0], neuron_pointers[0]->value);
+						neuron_sum += fann_mult(weights[0], *(neuron_pointers[0]->value));
 					case 0:
 						break;
 				}
 
-				for(; i != num_connections; i += 4)
+				#pragma omp parallel for reduction(+:neuron_sum)
+				for(i = num_connections & 3; i < num_connections; i += 4)
 				{
 					neuron_sum +=
-						fann_mult(weights[i], neuron_pointers[i]->value) +
-						fann_mult(weights[i + 1], neuron_pointers[i + 1]->value) +
-						fann_mult(weights[i + 2], neuron_pointers[i + 2]->value) +
-						fann_mult(weights[i + 3], neuron_pointers[i + 3]->value);
+						fann_mult(weights[i], *(neuron_pointers[i]->value)) +
+						fann_mult(weights[i + 1], *(neuron_pointers[i + 1]->value)) +
+						fann_mult(weights[i + 2], *(neuron_pointers[i + 2]->value)) +
+						fann_mult(weights[i + 3], *(neuron_pointers[i + 3]->value));
 				}
 			}
 
@@ -744,30 +1069,36 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 			{
 				case FANN_SIGMOID:
 				case FANN_SIGMOID_STEPWISE:
-					neuron_it->value =
+					*(neuron_it->value) =
 						(fann_type) fann_stepwise(v1, v2, v3, v4, v5, v6, r1, r2, r3, r4, r5, r6, 0,
 												  multiplier, neuron_sum);
 					break;
 				case FANN_SIGMOID_SYMMETRIC:
 				case FANN_SIGMOID_SYMMETRIC_STEPWISE:
-					neuron_it->value =
+					*(neuron_it->value) =
 						(fann_type) fann_stepwise(v1, v2, v3, v4, v5, v6, r1, r2, r3, r4, r5, r6,
 												  -multiplier, multiplier, neuron_sum);
 					break;
 				case FANN_THRESHOLD:
-					neuron_it->value = (fann_type) ((neuron_sum < 0) ? 0 : multiplier);
+					*(neuron_it->value) = (fann_type) ((neuron_sum < 0) ? 0 : multiplier);
 					break;
 				case FANN_THRESHOLD_SYMMETRIC:
-					neuron_it->value = (fann_type) ((neuron_sum < 0) ? -multiplier : multiplier);
+					*(neuron_it->value) = (fann_type) ((neuron_sum < 0) ? -multiplier : multiplier);
 					break;
 				case FANN_LINEAR:
-					neuron_it->value = neuron_sum;
+					*(neuron_it->value) = neuron_sum;
 					break;
 				case FANN_LINEAR_PIECE:
-					neuron_it->value = (fann_type)((neuron_sum < 0) ? 0 : (neuron_sum > multiplier) ? multiplier : neuron_sum);
+					*(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0 : (neuron_sum > multiplier) ? multiplier : neuron_sum);
 					break;
 				case FANN_LINEAR_PIECE_SYMMETRIC:
-					neuron_it->value = (fann_type)((neuron_sum < -multiplier) ? -multiplier : (neuron_sum > multiplier) ? multiplier : neuron_sum);
+					*(neuron_it->value) = (fann_type)((neuron_sum < -multiplier) ? -multiplier : (neuron_sum > multiplier) ? multiplier : neuron_sum);
+					break;
+				case FANN_LINEAR_PIECE_LEAKY:
+					*(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0.01 * neuron_sum: neuron_sum);
+					break;
+				case FANN_LINEAR_PIECE_RECT:
+					*(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0: neuron_sum);
 					break;
 				case FANN_ELLIOT:
 				case FANN_ELLIOT_SYMMETRIC:
@@ -792,7 +1123,7 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 			
 			neuron_it->sum = neuron_sum;
 
-			fann_activation_switch(activation_function, neuron_sum, neuron_it->value);
+			fann_activation_switch(activation_function, neuron_sum, *(neuron_it->value));
 #endif
 		}
 	}
@@ -803,11 +1134,123 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input)
 	neurons = (ann->last_layer - 1)->first_neuron;
 	for(i = 0; i != num_output; i++)
 	{
-		output[i] = neurons[i].value;
+		output[i] = *(neurons[i].value);
+	}
+#ifndef PLAN9
+} else {
+	GLenum err;
+
+	if (ann->onGPU == 0) {
+		fann_init_gpu(ann);
+
+		ann->onGPU = 1;
 	}
+
+	for (i = 0; i < ann->num_input; i++)
+		ann->glinputdata[i] = input[i];
+
+	glFinish();
+	glUseProgram(ann->runShaderProgram);
+	glDispatchCompute(1, 1, 1);
+	glMemoryBarrier(GL_ALL_BARRIER_BITS);
+	glFinish();
+
+	for(i = 0; i != ann->num_output; i++)
+		ann->output[i] = ann->gloutputdata[i];
+}
+#endif
 	return ann->output;
 }
 
+#ifndef PLAN9
+FANN_EXTERNAL void FANN_API fann_from_gpu(struct fann *ann)
+{
+	GLfloat *data;
+	int i;
+
+	if (ann->gl != 0) {
+		glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glweights);
+		data = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->total_connections * sizeof(GLfloat), GL_MAP_READ_BIT);
+		for(i = 0; i != ann->total_connections; i++)
+			ann->weights[i] = data[i];
+		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+	}
+}
+
+FANN_EXTERNAL void FANN_API fann_init_gpu(struct fann *ann)
+{
+	GLfloat *data;
+	GLfloat *glvalues;
+	GLfloat *glweights;
+	int nparameters;
+	GLfloat *parameters;
+	int i;
+	struct fann_layer *layer_it;
+
+	glGenBuffers(1, &ann->glnetwork);
+
+	nparameters = 1;
+	nparameters += (int)(ann->last_layer - ann->first_layer);
+	parameters = calloc(sizeof(GLfloat), nparameters);
+	parameters[0] = nparameters - 1;
+	for(i = 1, layer_it = ann->first_layer; layer_it != ann->last_layer; layer_it++, i++)
+		parameters[i] = (int)(layer_it->last_neuron - layer_it->first_neuron) - 1;
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glnetwork);
+	glBufferData(GL_SHADER_STORAGE_BUFFER, nparameters * sizeof(GLfloat), parameters, GL_DYNAMIC_COPY);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ann->glnetwork);
+
+//	free(parameters);
+
+	glGenBuffers(1, &ann->glweights);
+
+	glweights = calloc(sizeof(GLfloat), ann->total_connections);
+	for (i = 0; i != ann->total_connections; i++)
+		glweights[i] = ann->weights[i];
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glweights);
+	glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_connections * sizeof(GLfloat), glweights, GL_DYNAMIC_COPY);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ann->glweights);
+
+//	free(glweights);
+
+	glGenBuffers(1, &ann->glvalues);
+
+	glvalues = calloc(sizeof(GLfloat), ann->total_neurons);
+	for (i = 0; i != ann->total_neurons; i++)
+		glvalues[i] = ann->values[i];
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glvalues);
+	glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_neurons * sizeof(GLfloat), glvalues, GL_DYNAMIC_COPY);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ann->glvalues);
+
+//	free(glvalues);
+
+	glGenBuffers(1, &ann->glerrors);
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glerrors);
+	glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_neurons * sizeof(GLfloat), NULL, GL_DYNAMIC_COPY);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, ann->glerrors);
+
+	glGenBuffers(1, &ann->glinput);
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glinput);
+	glBufferStorage(GL_SHADER_STORAGE_BUFFER, ann->num_input * sizeof(GLfloat), NULL, GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_COHERENT_BIT);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, ann->glinput);
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glinput);
+	ann->glinputdata = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->num_input * sizeof(GLfloat), GL_MAP_WRITE_BIT|GL_MAP_COHERENT_BIT|GL_MAP_PERSISTENT_BIT);
+
+	glGenBuffers(1, &ann->gloutput);
+
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->gloutput);
+	glBufferStorage(GL_SHADER_STORAGE_BUFFER, ann->num_output * sizeof(GLfloat), NULL, GL_MAP_READ_BIT|GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_COHERENT_BIT);
+	glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, ann->gloutput);
+	glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->gloutput);
+	ann->gloutputdata = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->num_output * sizeof(GLfloat), GL_MAP_READ_BIT|GL_MAP_WRITE_BIT|GL_MAP_COHERENT_BIT|GL_MAP_PERSISTENT_BIT);
+
+}
+#endif /* PLAN9 */
+
 FANN_EXTERNAL void FANN_API fann_destroy(struct fann *ann)
 {
 	if(ann == NULL)
@@ -1580,6 +2023,9 @@ struct fann *fann_allocate_structure(unsigned int num_layers)
 		return NULL;
 	}
 
+#ifndef PLAN9
+	ann->gl = 0;
+#endif
 	ann->errno_f = FANN_E_NO_ERROR;
 	ann->error_log = fann_default_error_log;
 	ann->errstr = NULL;
@@ -1708,6 +2154,11 @@ struct fann *fann_allocate_structure(unsigned int num_layers)
 
 	ann->last_layer = ann->first_layer + num_layers;
 
+#ifndef PLAN9
+	fann_init_egl();
+	fann_create_shaders(ann);
+#endif
+
 	return ann;
 }
 
@@ -1756,10 +2207,12 @@ void fann_allocate_neurons(struct fann *ann)
 	struct fann_neuron *neurons;
 	unsigned int num_neurons_so_far = 0;
 	unsigned int num_neurons = 0;
+	unsigned int i;
 
 	/* all the neurons is allocated in one long array (calloc clears mem) */
 	neurons = (struct fann_neuron *) calloc(ann->total_neurons, sizeof(struct fann_neuron));
 	ann->total_neurons_allocated = ann->total_neurons;
+	ann->values = calloc(ann->total_neurons, sizeof(fann_type));
 
 	if(neurons == NULL)
 	{
@@ -1772,6 +2225,10 @@ void fann_allocate_neurons(struct fann *ann)
 		num_neurons = (unsigned int)(layer_it->last_neuron - layer_it->first_neuron);
 		layer_it->first_neuron = neurons + num_neurons_so_far;
 		layer_it->last_neuron = layer_it->first_neuron + num_neurons;
+		layer_it->values = &ann->values[num_neurons_so_far];
+		for (i = 0; i < num_neurons; i++) {
+			neurons[num_neurons_so_far + i].value = &(layer_it->values[i]);
+		}
 		num_neurons_so_far += num_neurons;
 	}
 
@@ -1828,7 +2285,7 @@ FANN_EXTERNAL void FANN_API fann_enable_seed_rand()
 /* INTERNAL FUNCTION
    Seed the random function.
  */
-void fann_seed_rand()
+void fann_seed_rand(void)
 {
 #ifndef _WIN32
 	FILE *fp = fopen("/dev/urandom", "r");
diff --git a/src/fann_cascade.c b/src/fann_cascade.c
index 51954907..bfab8cd4 100644
--- a/src/fann_cascade.c
+++ b/src/fann_cascade.c
@@ -17,6 +17,12 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
+#ifdef PLAN9
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#endif
+
 #include "config.h"
 #include "fann.h"
 #include "string.h"
@@ -446,7 +452,7 @@ int fann_initialize_candidates(struct fann *ann)
 				/* TODO candidates should actually be created both in
 				 * the last layer before the output layer, and in a new layer.
 				 */
-				neurons[candidate_index].value = 0;
+				*(neurons[candidate_index].value) = 0;
 				neurons[candidate_index].sum = 0;
 				
 				neurons[candidate_index].activation_function =
@@ -582,11 +588,11 @@ void fann_update_candidate_slopes(struct fann *ann)
 		switch (i)
 		{
 			case 3:
-				cand_sum += weights[2] * neurons[2].value;
+				cand_sum += weights[2] * *(neurons[2].value);
 			case 2:
-				cand_sum += weights[1] * neurons[1].value;
+				cand_sum += weights[1] * *(neurons[1].value);
 			case 1:
-				cand_sum += weights[0] * neurons[0].value;
+				cand_sum += weights[0] * *(neurons[0].value);
 			case 0:
 				break;
 		}
@@ -594,9 +600,9 @@ void fann_update_candidate_slopes(struct fann *ann)
 		for(; i != num_connections; i += 4)
 		{
 			cand_sum +=
-				weights[i] * neurons[i].value +
-				weights[i + 1] * neurons[i + 1].value +
-				weights[i + 2] * neurons[i + 2].value + weights[i + 3] * neurons[i + 3].value;
+				weights[i] * *(neurons[i].value) +
+				weights[i + 1] * *(neurons[i + 1].value) +
+				weights[i + 2] * *(neurons[i + 2].value) + weights[i + 3] * *(neurons[i + 3].value);
 		}
 		/*
 		 * for(i = 0; i < num_connections; i++){
@@ -617,7 +623,7 @@ void fann_update_candidate_slopes(struct fann *ann)
 		/* printf("%f = sigmoid(%f);\n", activation, cand_sum); */
 
 		cand_it->sum = cand_sum;
-		cand_it->value = activation;
+		*(cand_it->value) = activation;
 
 		derived = fann_activation_derived(cand_it->activation_function,
 										  cand_it->activation_steepness, activation, cand_sum);
@@ -655,7 +661,7 @@ void fann_update_candidate_slopes(struct fann *ann)
 		cand_slopes = ann->train_slopes + cand_it->first_con;
 		for(i = 0; i < num_connections; i++)
 		{
-			cand_slopes[i] -= error_value * neurons[i].value;
+			cand_slopes[i] -= error_value * *(neurons[i].value);
 		}
 	}
 }
@@ -741,6 +747,8 @@ fann_type fann_train_candidates_epoch(struct fann *ann, struct fann_train_data *
 				case FANN_GAUSSIAN_STEPWISE:
 				case FANN_ELLIOT:
 				case FANN_LINEAR_PIECE:
+				case FANN_LINEAR_PIECE_LEAKY:
+				case FANN_LINEAR_PIECE_RECT:
 				case FANN_SIN:
 				case FANN_COS:
 					break;
@@ -923,7 +931,7 @@ void fann_add_candidate_neuron(struct fann *ann, struct fann_layer *layer)
 	}
 
 	/* Now inititalize the actual neuron */
-	neuron_place->value = 0;
+	*(neuron_place->value) = 0;
 	neuron_place->sum = 0;
 	neuron_place->activation_function = candidate->activation_function;
 	neuron_place->activation_steepness = candidate->activation_steepness;
diff --git a/src/fann_io.c b/src/fann_io.c
index 40fd5f1a..39778239 100644
--- a/src/fann_io.c
+++ b/src/fann_io.c
@@ -19,9 +19,14 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#ifndef PLAN9
 #include <stdarg.h>
+#endif
 #include <string.h>
 #include <limits.h>
+#ifdef PLAN9
+#include <math.h>
+#endif
 
 #include "config.h"
 #include "fann.h"
@@ -41,6 +46,7 @@ FANN_EXTERNAL struct fann *FANN_API fann_create_from_file(const char *configurat
 	}
 	ann = fann_create_from_fd(conf, configuration_file);
 	fclose(conf);
+
 	return ann;
 }
 
diff --git a/src/fann_train.c b/src/fann_train.c
index 049e6de9..19e4d738 100644
--- a/src/fann_train.c
+++ b/src/fann_train.c
@@ -17,6 +17,10 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
+#ifdef PLAN9
+#define sqrtf sqrt
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
@@ -42,6 +46,10 @@ fann_type fann_activation_derived(unsigned int activation_function,
 		case FANN_LINEAR_PIECE:
 		case FANN_LINEAR_PIECE_SYMMETRIC:
 			return (fann_type) fann_linear_derive(steepness, value);
+		case FANN_LINEAR_PIECE_LEAKY:
+			return (fann_type) ((value<0)? steepness * 0.01: steepness);
+		case FANN_LINEAR_PIECE_RECT:
+			return (fann_type) ((value<0)? 0: steepness);
 		case FANN_SIGMOID:
 		case FANN_SIGMOID_STEPWISE:
 			value = fann_clip(value, 0.01f, 0.99f);
@@ -95,11 +103,39 @@ FANN_EXTERNAL void FANN_API fann_train(struct fann *ann, fann_type * input,
 {
 	fann_run(ann, input);
 
+#ifndef PLAN9
+if (ann->gl == 0) {
+#endif
 	fann_compute_MSE(ann, desired_output);
 
 	fann_backpropagate_MSE(ann);
 
 	fann_update_weights(ann);
+#ifndef PLAN9
+} else {
+	int i;
+	fann_type err;
+	GLfloat *errors;
+	GLenum glerr;
+
+	for (i = 0; i < ann->num_output; i++) {
+		err = desired_output[i] - ann->output[i];
+		ann->MSE_value += err * err;
+	}
+
+	for (i = 0; i < ann->num_input; i++)
+		ann->glinputdata[i] = input[i];
+
+	for (i = 0; i < ann->num_output; i++)
+		ann->gloutputdata[i] = desired_output[i];
+
+	glFinish();
+	glUseProgram(ann->trainShaderProgram);
+	glDispatchCompute(1, 1, 1);
+	glMemoryBarrier(GL_ALL_BARRIER_BITS);
+	glFinish();
+}
+#endif
 }
 #endif
 
@@ -133,6 +169,8 @@ fann_type fann_update_MSE(struct fann *ann, struct fann_neuron* neuron, fann_typ
 		case FANN_LINEAR_PIECE:
 		case FANN_SIN:
 		case FANN_COS:
+		case FANN_LINEAR_PIECE_LEAKY:
+		case FANN_LINEAR_PIECE_RECT:
 			break;
 	}
 
@@ -256,7 +294,7 @@ void fann_compute_MSE(struct fann *ann, fann_type * desired_output)
 
 	for(; last_layer_begin != last_layer_end; last_layer_begin++)
 	{
-		neuron_value = last_layer_begin->value;
+		neuron_value = *(last_layer_begin->value);
 		neuron_diff = *desired_output - neuron_value;
 
 		neuron_diff = fann_update_MSE(ann, last_layer_begin, neuron_diff);
@@ -274,12 +312,13 @@ void fann_compute_MSE(struct fann *ann, fann_type * desired_output)
 		*error_it = fann_activation_derived(last_layer_begin->activation_function,
 											last_layer_begin->activation_steepness, neuron_value,
 											last_layer_begin->sum) * neuron_diff;
-
+//		fprintf(stderr, "%0.10f ", *error_it);
 		desired_output++;
 		error_it++;
 
 		ann->num_MSE++;
 	}
+//	fprintf(stderr, "\n");
 }
 
 /* INTERNAL FUNCTION
@@ -357,7 +396,7 @@ void fann_backpropagate_MSE(struct fann *ann)
 		for(neuron_it = (layer_it - 1)->first_neuron; neuron_it != last_neuron; neuron_it++)
 		{
 			*error_prev_layer *= fann_activation_derived(neuron_it->activation_function, 
-				neuron_it->activation_steepness, neuron_it->value, neuron_it->sum);
+				neuron_it->activation_steepness, *(neuron_it->value), neuron_it->sum);
 			error_prev_layer++;
 		}
 		
@@ -377,7 +416,7 @@ void fann_update_weights(struct fann *ann)
 
 	/* store some variabels local for fast access */
 	const float learning_rate = ann->learning_rate;
-    const float learning_momentum = ann->learning_momentum;        
+	const float learning_momentum = ann->learning_momentum;
 	struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
 	struct fann_layer *first_layer = ann->first_layer;
 	const struct fann_layer *last_layer = ann->last_layer;
@@ -421,7 +460,7 @@ void fann_update_weights(struct fann *ann)
 				weights_deltas = deltas_begin + neuron_it->first_con;
 				for(i = 0; i != num_connections; i++)
 				{
-					delta_w = tmp_error * prev_neurons[i].value + learning_momentum * weights_deltas[i];
+					delta_w = tmp_error * *(prev_neurons[i].value) + learning_momentum * weights_deltas[i];
 					weights[i] += delta_w ;
 					weights_deltas[i] = delta_w;
 				}
@@ -437,7 +476,7 @@ void fann_update_weights(struct fann *ann)
 				weights_deltas = deltas_begin + neuron_it->first_con;
 				for(i = 0; i != num_connections; i++)
 				{
-					delta_w = tmp_error * prev_neurons[i].value + learning_momentum * weights_deltas[i];
+					delta_w = tmp_error * *(prev_neurons[i].value) + learning_momentum * weights_deltas[i];
 					weights[i] += delta_w;
 					weights_deltas[i] = delta_w;
 				}
@@ -514,7 +553,7 @@ void fann_update_slopes_batch(struct fann *ann, struct fann_layer *layer_begin,
 				num_connections = neuron_it->last_con - neuron_it->first_con;
 				for(i = 0; i != num_connections; i++)
 				{
-					neuron_slope[i] += tmp_error * prev_neurons[i].value;
+					neuron_slope[i] += tmp_error * *(prev_neurons[i].value);
 				}
 			}
 		}
@@ -528,7 +567,7 @@ void fann_update_slopes_batch(struct fann *ann, struct fann_layer *layer_begin,
 				connections = ann->connections + neuron_it->first_con;
 				for(i = 0; i != num_connections; i++)
 				{
-					neuron_slope[i] += tmp_error * connections[i]->value;
+					neuron_slope[i] += tmp_error * *(connections[i]->value);
 				}
 			}
 		}
diff --git a/src/fann_train_data.c b/src/fann_train_data.c
index b93ec35d..a431954c 100644
--- a/src/fann_train_data.c
+++ b/src/fann_train_data.c
@@ -21,6 +21,10 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#ifdef PLAN9
+#include <math.h>
+#define sqrtf sqrt
+#endif
 
 #include "config.h"
 #include "fann.h"
diff --git a/src/fixedfann.c b/src/fixedfann.c
index a48cd085..3f2b1465 100644
--- a/src/fixedfann.c
+++ b/src/fixedfann.c
@@ -19,6 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 /* Easy way to allow for build of multiple binaries */
 
+#ifdef PLAN9
+#include <stdio.h>
+#endif
+
 #include "config.h"
 #include "fixedfann.h"
 
diff --git a/src/floatfann.c b/src/floatfann.c
index b9ad0dd0..fae67a4a 100644
--- a/src/floatfann.c
+++ b/src/floatfann.c
@@ -19,6 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 /* Easy way to allow for build of multiple binaries */
 
+#ifdef PLAN9
+#include <stdio.h>
+#endif
+
 #include "config.h"
 #include "floatfann.h"
 
@@ -28,4 +32,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #include "fann_train_data.c"
 #include "fann_error.c"
 #include "fann_cascade.c"
+#ifndef PLAN9
 #include "parallel_fann.c"
+#endif
diff --git a/src/include/fann.h b/src/include/fann.h
index 5adda119..82289b24 100644
--- a/src/include/fann.h
+++ b/src/include/fann.h
@@ -37,6 +37,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 /* Group: Creation, Destruction & Execution */
 	
 #ifndef FANN_INCLUDE
+#ifdef _PLAN9_SOURCE
+#pragma lib "/$M/lib/ape/libfann.a"
+#endif
+
 /* just to allow for inclusion of fann.h in normal stuations where only floats are needed */ 
 #ifdef FIXEDFANN
 #include "fixedfann.h"
@@ -45,7 +49,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #endif	/* FIXEDFANN  */
 	
 #else
-	
+
+#ifndef PLAN9
+
 /* COMPAT_TIME REPLACEMENT */ 
 #ifndef _WIN32
 #include <sys/time.h>
@@ -58,6 +64,8 @@ extern unsigned long __stdcall GetTickCount(void);
 #include <windows.h>
 #endif	/* _MSC_EXTENSIONS */
 #endif	/* _WIN32 */
+
+#endif
 		
 #ifndef __fann_h__
 #define __fann_h__
@@ -113,11 +121,14 @@ extern "C"
  to use dll's. To use dll's FANN_USE_DLL has to be defined before
  including the fann headers.
 */ 
+#ifndef PLAN9
 #if defined(_MSC_VER) && (_MSC_VER > 1300)
 #ifndef FANN_NO_DLL
 #define FANN_USE_DLL
 #endif	/* FANN_USE_LIB */
 #endif	/* _MSC_VER */
+#endif
+#ifndef PLAN9
 #if defined(_MSC_VER) && (defined(FANN_USE_DLL) || defined(FANN_DLL_EXPORTS))
 #ifdef FANN_DLL_EXPORTS
 #define FANN_EXTERNAL __declspec(dllexport)
@@ -128,7 +139,11 @@ extern "C"
 #else							/*  */
 #define FANN_EXTERNAL
 #define FANN_API
+#ifndef PLAN9
+#include <GL/gl.h>
+#endif
 #endif	/* _MSC_VER */
+#endif
 /* ----- End of macros used to define DLL external entrypoints ----- */ 
 
 #include "fann_error.h"
@@ -139,6 +154,8 @@ extern "C"
 #include "fann_cascade.h"
 #include "fann_io.h"
 
+#define MULTILINE_STRING(...) #__VA_ARGS__
+
 /* Function: fann_create_standard
 	
 	Creates a standard fully connected backpropagation neural network.
@@ -615,6 +632,13 @@ FANN_EXTERNAL void FANN_API fann_disable_seed_rand();
 */
 FANN_EXTERNAL void FANN_API fann_enable_seed_rand();
 
+#ifndef PLAN9
+
+FANN_EXTERNAL void FANN_API fann_from_gpu(struct fann *ann);
+
+FANN_EXTERNAL void FANN_API fann_init_gpu(struct fann *ann);
+
+#endif /* PLAN9 */
 
 #ifdef FIXEDFANN
 	
diff --git a/src/include/fann_activation.h b/src/include/fann_activation.h
index 80cab7ca..eba684a4 100644
--- a/src/include/fann_activation.h
+++ b/src/include/fann_activation.h
@@ -33,13 +33,19 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 __doublefann_h__ is not defined
 */
 #ifndef __doublefann_h__
+#ifndef PLAN9
 #define FANN_EXP(x) expf(x)
-#define FANN_SIN(x) sinf(x)
-#define FANN_COS(x) cosf(x)
+#define _FANN_SIN(x) sinf(x)
+#define _FANN_COS(x) cosf(x)
 #else
 #define FANN_EXP(x) exp(x)
-#define FANN_SIN(x) sin(x)
-#define FANN_COS(x) cos(x)
+#define _FANN_SIN(x) sin(x)
+#define _FANN_COS(x) cos(x)
+#endif
+#else
+#define FANN_EXP(x) exp(x)
+#define _FANN_SIN(x) sin(x)
+#define _FANN_COS(x) cos(x)
 #endif
 
 #define fann_linear_func(v1, r1, v2, r2, sum) (((((r2)-(r1)) * ((sum)-(v1)))/((v2)-(v1))) + (r1))
@@ -80,19 +86,19 @@ __doublefann_h__ is not defined
 #define fann_elliot_symmetric_derive(steepness, value, sum) (steepness * 1.0f / ((1.0f + fann_abs(sum)) * (1.0f + fann_abs(sum))))
 
 /* FANN_SIN_SYMMETRIC */
-#define fann_sin_symmetric_real(sum) (FANN_SIN(sum))
+#define fann_sin_symmetric_real(sum) (_FANN_SIN(sum))
 #define fann_sin_symmetric_derive(steepness, sum) (steepness*cos(steepness*sum))
 
 /* FANN_COS_SYMMETRIC */
-#define fann_cos_symmetric_real(sum) (FANN_COS(sum))
+#define fann_cos_symmetric_real(sum) (_FANN_COS(sum))
 #define fann_cos_symmetric_derive(steepness, sum) (steepness*-sin(steepness*sum))
 
 /* FANN_SIN */
-#define fann_sin_real(sum) (FANN_SIN(sum)/2.0f+0.5f)
+#define fann_sin_real(sum) (_FANN_SIN(sum)/2.0f+0.5f)
 #define fann_sin_derive(steepness, sum) (steepness*cos(steepness*sum)/2.0f)
 
 /* FANN_COS */
-#define fann_cos_real(sum) (FANN_COS(sum)/2.0f+0.5f)
+#define fann_cos_real(sum) (_FANN_COS(sum)/2.0f+0.5f)
 #define fann_cos_derive(steepness, sum) (steepness*-sin(steepness*sum)/2.0f)
 
 #define fann_activation_switch(activation_function, value, result) \
@@ -152,6 +158,12 @@ switch(activation_function) \
 	case FANN_GAUSSIAN_STEPWISE: \
         result = 0; \
         break; \
+	case FANN_LINEAR_PIECE_LEAKY: \
+		result = (fann_type)((value < 0) ? value*0.01 : value); \
+        break; \
+	case FANN_LINEAR_PIECE_RECT: \
+		result = (fann_type)((value < 0) ? 0 : value); \
+        break; \
 }
 
 #endif
diff --git a/src/include/fann_data.h b/src/include/fann_data.h
index 99f42c76..9e7dd6ed 100644
--- a/src/include/fann_data.h
+++ b/src/include/fann_data.h
@@ -197,6 +197,16 @@ static char const *const FANN_TRAIN_NAMES[] = {
 	 * span: 0 <= y <= 1
 	 * y = cos(x*s)/2+0.5
 	 * d = s*-sin(x*s)/2
+
+	FANN_LINEAR_PIECE_LEAKY - leaky ReLU
+	 * span: -inf < y < inf
+	 * y = x<0? 0.01*x: x
+	 * d = x<0? 0.01: 1
+	 
+	FANN_LINEAR_PIECE_RECT - ReLU
+	 * span: -inf < y < inf
+	 * y = x<0? 0: x
+	 * d = x<0? 0: 1
 	 
 	See also:
    	<fann_set_activation_function_layer>, <fann_set_activation_function_hidden>,
@@ -226,7 +236,9 @@ enum fann_activationfunc_enum
 	FANN_SIN_SYMMETRIC,
 	FANN_COS_SYMMETRIC,
 	FANN_SIN,
-	FANN_COS
+	FANN_COS,
+	FANN_LINEAR_PIECE_LEAKY,
+	FANN_LINEAR_PIECE_RECT,
 };
 
 /* Constant: FANN_ACTIVATIONFUNC_NAMES
@@ -258,7 +270,9 @@ static char const *const FANN_ACTIVATIONFUNC_NAMES[] = {
 	"FANN_SIN_SYMMETRIC",
 	"FANN_COS_SYMMETRIC",
 	"FANN_SIN",
-	"FANN_COS"
+	"FANN_COS",
+	"FANN_LINEAR_PIECE_LEAKY",
+	"FANN_LINEAR_PIECE_RECT"
 };
 
 /* Enum: fann_errorfunc_enum
@@ -420,7 +434,7 @@ struct fann_neuron
 	/* The sum of the inputs multiplied with the weights */
 	fann_type sum;
 	/* The value of the activation function applied to the sum */
-	fann_type value;
+	fann_type *value;
 	/* The steepness of the activation function */
 	fann_type activation_steepness;
 	/* Used to choose which activation function to use */
@@ -445,6 +459,8 @@ struct fann_layer
 	/* A pointer to the neuron past the last neuron in the layer */
 	/* the number of neurons is last_neuron - first_neuron */
 	struct fann_neuron *last_neuron;
+
+	fann_type *values;
 };
 
 /* Struct: struct fann_error
@@ -764,7 +780,30 @@ struct fann
 	 * Not allocated if not used.	 
 	 */
 	fann_type *prev_weights_deltas;
-	
+
+#ifndef PLAN9
+	GLuint runShaderID;
+	GLuint runShaderProgram;
+
+	GLuint trainShaderID;
+	GLuint trainShaderProgram;
+
+	unsigned char onGPU;
+	unsigned char gl;
+
+	GLuint glerrors;
+	GLuint glweights;
+	GLuint glvalues;
+	GLuint glnetwork;
+	GLuint glinput;
+	GLuint gloutput;
+
+	GLfloat *glinputdata;
+	GLfloat *gloutputdata;
+#endif /* PLAN9 */
+
+	fann_type *values;
+
 #ifndef FIXEDFANN
 	/* Arithmetic mean used to remove steady component in input data.  */
 	float *scale_mean_in;
diff --git a/src/include/fann_data_cpp.h b/src/include/fann_data_cpp.h
index 5b378bb4..cb8a20c8 100644
--- a/src/include/fann_data_cpp.h
+++ b/src/include/fann_data_cpp.h
@@ -205,6 +205,16 @@ namespace FANN {
          * y = cos(x*s)
          * d = s*-sin(x*s)
 
+	FANN_LINEAR_PIECE_LEAKY - leaky ReLU
+	 * span: -inf < y < inf
+	 y = x<0? 0.01*x: x
+	 d = x<0? 0.01: 1
+
+	FANN_LINEAR_PIECE_RECT - ReLU
+	 * span: -inf < y < inf
+	 y = x<0? 0: x
+	 d = x<0? 0: 1
+
 	    See also:
 		    <neural_net::set_activation_function_hidden>,
 		    <neural_net::set_activation_function_output>
@@ -225,7 +235,9 @@ namespace FANN {
         LINEAR_PIECE,
         LINEAR_PIECE_SYMMETRIC,
         SIN_SYMMETRIC,
-        COS_SYMMETRIC
+        COS_SYMMETRIC,
+	LINEAR_PIECE_LEAKY,
+	LINEAR_PIECE_RECT
     };
 
     /* Enum: network_type_enum
diff --git a/src/include/fann_error.h b/src/include/fann_error.h
index 69877f75..f119abd6 100644
--- a/src/include/fann_error.h
+++ b/src/include/fann_error.h
@@ -21,6 +21,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #define __fann_error_h__
 
 #include <stdio.h>
+#ifdef PLAN9
+#define FANN_EXTERNAL
+#define FANN_API
+#endif
 
 #define FANN_ERRSTR_MAX 128
 struct fann_error;
diff --git a/src/include/fann_internal.h b/src/include/fann_internal.h
index 81787b3d..2bf2de19 100644
--- a/src/include/fann_internal.h
+++ b/src/include/fann_internal.h
@@ -22,9 +22,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 /* internal include file, not to be included directly
  */
 
+#ifndef PLAN9
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#endif
 #include "fann_data.h"
 
 #define FANN_FIX_VERSION "FANN_FIX_2.0"
@@ -70,7 +72,7 @@ int fann_save_train_internal_fd(struct fann_train_data *data, FILE * file, const
 								 unsigned int save_as_fixed, unsigned int decimal_point);
 
 void fann_update_stepwise(struct fann *ann);
-void fann_seed_rand();
+void fann_seed_rand(void);
 
 void fann_error(struct fann_error *errdat, const enum fann_errno_enum errno_f, ...);
 void fann_init_error_data(struct fann_error *errdat);
diff --git a/src/mkfile b/src/mkfile
new file mode 100644
index 00000000..7597b83e
--- /dev/null
+++ b/src/mkfile
@@ -0,0 +1,31 @@
+</$objtype/mkfile
+
+CC=pcc
+CFLAGS=-Iinclude/ -DPLAN9 -DFANN_API'=' -DFANN_EXTERNAL'=' -c
+LIB=/$objtype/lib/ape/libfann.a
+
+OFILES=\
+	fann.$O \
+	fann_cascade.$O \
+	fann_error.$O \
+	fann_io.$O \
+	fann_train.$O \
+	fann_train_data.$O
+
+HFILES=\
+	include/fann.h \
+	include/fann_activation.h \
+	include/fann_cascade.h \
+	include/fann_data.h \
+	include/fann_error.h \
+	include/fann_internal.h \
+	include/fann_io.h \
+	include/fann_train.h \
+	include/fixedfann.h \
+	include/floatfann.h \
+	include/doublefann.h
+
+install:V:
+	cp $HFILES /sys/include/ape
+
+</sys/src/cmd/mksyslib
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c79d4a79..b6cc520e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -22,4 +22,4 @@ else()
 endif()
 
 ADD_EXECUTABLE(fann_tests main.cpp fann_test.cpp fann_test_data.cpp fann_test_train.cpp)
-target_link_libraries(fann_tests gtest doublefann)
+target_link_libraries(fann_tests gtest doublefann GL)
diff --git a/tests/gputest.c b/tests/gputest.c
new file mode 100644
index 00000000..741ac811
--- /dev/null
+++ b/tests/gputest.c
@@ -0,0 +1,66 @@
+#include <floatfann.h>
+#include <stdlib.h>
+#include <time.h>
+
+void
+fanntest(struct fann *ann, fann_type *input, fann_type *output, fann_type *desired_output, int gl)
+{
+	double a, b;
+	struct timeval now;
+	int o;
+
+	ann->gl = gl;
+
+	gettimeofday(&now, NULL);
+	b = now.tv_sec * 1000000;
+	b += now.tv_usec;
+
+	fann_reset_MSE(ann);
+	fann_train(ann, input, desired_output);
+
+	gettimeofday(&now, NULL);
+	a = now.tv_sec * 1000000;
+	a += now.tv_usec;
+
+	fprintf(stderr, "%cPU: %f microseconds MSE: %0.10lf\n", gl? 'G': 'C', a - b, ann->MSE_value);
+}
+
+int
+main(int argc, char **argv)
+{
+	fann_type *input;
+	fann_type *output;
+	fann_type *desired_output;
+	struct fann *ann;
+	int i;
+	GLfloat *data;
+
+	if (argc < 2)
+		return -1;
+
+	i = atoi(argv[1]);
+
+	ann = fann_create_standard(5, i, i, i, i, i);
+	fann_set_activation_function_hidden(ann, FANN_LINEAR_PIECE_LEAKY);
+	fann_set_activation_function_output(ann, FANN_SIGMOID);
+	input = calloc(sizeof(fann_type), ann->num_input);
+	desired_output = calloc(sizeof(fann_type), ann->num_output);
+
+	srand(time(NULL));
+
+	for (i = 0; i < ann->num_input; i++)
+		input[i] = ((float)rand()/RAND_MAX)-0.5;
+
+	for (i = 0; i < ann->num_output; i++)
+		desired_output[i] = ((float)rand()/RAND_MAX)-0.5;
+
+	fann_print_parameters(ann);
+
+	for (i = 0; i < 10; i++) {
+		fanntest(ann, input, output, desired_output, 1);
+		fanntest(ann, input, output, desired_output, 0);
+	}
+
+	return 0;
+}
+
diff --git a/tests/xortest.c b/tests/xortest.c
new file mode 100644
index 00000000..195c2b19
--- /dev/null
+++ b/tests/xortest.c
@@ -0,0 +1,27 @@
+#include <fann.h>
+
+int
+main() {
+	int i;
+	struct fann *ann = fann_create_standard(3, 2, 5, 1);
+	fann_type input[4][2] = {
+		{ 0.0, 0.0 },
+		{ 1.0, 0.0 },
+		{ 0.0, 1.0 },
+		{ 1.0, 1.0 }
+	};
+	fann_type output[4][1] = {
+		{ 0.0 },
+		{ 1.0 },
+		{ 1.0 },
+		{ 0.0 }
+	};
+
+	do {
+		fann_reset_MSE(ann);
+		for (i = 0; i < 4; i++)
+			fann_train(ann, input[i], output[i]);
+	} while (ann->MSE_value > 0.001);
+
+	fprintf(stderr, "MSE: %f\n", ann->MSE_value);
+}