diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b11d762..95c628fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ IF(BIICODE) TARGET_COMPILE_OPTIONS(${BII_LIB_TARGET} PUBLIC -DFANN_DLL_EXPORTS) ELSE() IF(${examples_present}) - TARGET_LINK_LIBRARIES(${BII_BLOCK_TARGET} INTERFACE gomp) + TARGET_LINK_LIBRARIES(${BII_BLOCK_TARGET} INTERFACE gomp OpenGL) ENDIF() ENDIF() ELSE() diff --git a/examples/mkfile b/examples/mkfile new file mode 100644 index 00000000..fb19fb5b --- /dev/null +++ b/examples/mkfile @@ -0,0 +1,11 @@ + +#endif + #include "config.h" #include "doublefann.h" @@ -28,4 +32,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "fann_train_data.c" #include "fann_error.c" #include "fann_cascade.c" +#ifndef PLAN9 #include "parallel_fann.c" +#endif diff --git a/src/fann.c b/src/fann.c index 9af7e388..f86221d6 100644 --- a/src/fann.c +++ b/src/fann.c @@ -16,17 +16,340 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + Why be a human? + I want to live since I do. + This life is painful. + + I have it all here, + yet still I feel so much pain. + Mortality sucks. + + My heart broke again. + So this time will be the last, + I will be metal. +*/ #include #include -#include #include #include #include +#ifdef PLAN9 +#include +#else +#include +#include +#include +#include +#include +#include +#include +#endif #include "config.h" #include "fann.h" +#ifndef PLAN9 +static const char* runShader = "#version 310 es\n" + "precision highp float;\n" + "layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n" + "layout(std430) buffer;\n" + "layout(binding = 0) buffer Network\n" + "{\n" + " float e[];\n" + "} network;\n" + "layout(binding = 1) buffer Weights\n" + "{\n" + " float e[];\n" + "} weights;\n" + "layout(binding = 2) buffer Values\n" + "{\n" + " float e[];\n" + "} values;\n" + "layout(binding = 3) buffer Errors\n" + "{\n" + " float e[];\n" + "} errors;\n" + "layout(binding = 4) buffer Input\n" + "{\n" + " float e[];\n" + "} input_data;\n" + "layout(binding = 5) buffer Output\n" + "{\n" + " float e[];\n" + "} output_data;\n" + "void main()\n" + "{\n" + " int idx = int(gl_LocalInvocationID.x);\n" + " int threads = int(gl_WorkGroupSize.x);\n" + " int layers;\n" + " int i, o, n, inputs, outputs, l, total_neurons, total_weights;\n" + " layers = int(network.e[0]) - 1;\n" + " inputs = int(network.e[1]);\n" + " for (i = idx; i < inputs; i += threads)\n" + " values.e[i] = input_data.e[i];\n" + " barrier();\n" + " total_neurons = 0;\n" + " total_weights = 0;\n" + " for (l = 1; l < layers; l++) {\n" + " inputs = int(network.e[l]);\n" + " outputs = int(network.e[l+1]);\n" + " if (idx == 0)\n" + " values.e[total_neurons + inputs] = 1.0;\n" + " barrier();\n" + " for (o = idx; o < outputs; o += threads) {\n" + " errors.e[o] = 0.0;\n" + " n = o * inputs + o;\n" + " for (i = 0; i <= inputs; i++)\n" + " errors.e[o] += values.e[total_neurons + i] * weights.e[total_weights + n + i];\n" + " }\n" + " total_neurons += inputs + 1;\n" + " for (o = idx; o < outputs; o += threads) {\n" + " errors.e[o] *= 0.5;\n" + " if (errors.e[o] > 300.0)\n" + " errors.e[o] = 300.0;\n" + " else if (errors.e[o] < -300.0)\n" + " errors.e[o] = -300.0;\n" + " if (errors.e[o] < 0.0)\n" + " errors.e[o] *= 0.01;\n" + " values.e[total_neurons + o] = errors.e[o];\n" + " }\n" + " barrier();\n" + " total_weights += inputs * outputs + outputs;\n" + " }\n" + " inputs = int(network.e[layers]);\n" + " outputs = int(network.e[layers+1]);\n" + " if (idx == 0)\n" + " values.e[total_neurons + inputs] = 1.0;\n" + " barrier();\n" + " for (o = idx; o < outputs; o += threads) {\n" + " errors.e[o] = 0.0;\n" + " n = o * inputs + o;\n" + " for (i = 0; i <= inputs; i++)\n" + " errors.e[o] += values.e[total_neurons + i] * weights.e[total_weights + n + i];\n" + " if (errors.e[o] > 600.0)\n" + " errors.e[o] = 600.0;\n" + " else if (errors.e[o] < -600.0)\n" + " errors.e[o] = -600.0;\n" + " values.e[total_neurons + inputs + 1 + o] = (1.0/(1.0 + exp(-errors.e[o])));\n" + " output_data.e[o] = values.e[total_neurons + inputs + 1 + o];\n" + " }\n" + " barrier();\n" + "}\n"; + +static const char* trainShader = "#version 310 es\n" + "precision highp float;\n" + "layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n" + "layout(std430) buffer;\n" + "layout(binding = 0) buffer Network\n" + "{\n" + " float e[];\n" + "} network;\n" + "layout(binding = 1) buffer Weights\n" + "{\n" + " float e[];\n" + "} weights;\n" + "layout(binding = 2) buffer Values\n" + "{\n" + " float e[];\n" + "} values;\n" + "layout(binding = 3) buffer Errors\n" + "{\n" + " float e[];\n" + "} errors;\n" + "layout(binding = 4) buffer Input\n" + "{\n" + " float e[];\n" + "} input_data;\n" + "layout(binding = 5) buffer Output\n" + "{\n" + " float e[];\n" + "} output_data;\n" + "void main()\n" + "{\n" + " int idx = int(gl_LocalInvocationID.x);\n" + " int threads = int(gl_WorkGroupSize.x);\n" + " int layers;\n" + " int i, o, l, n, total_neurons, total_weights, outputs, inputs, neuron_prev;\n" + " float neuron_diff, tmp_error;\n" + " layers = int(network.e[0]);\n" + " inputs = int(network.e[1]);\n" + " total_neurons = 0;\n" + " total_weights = 0;\n" + " for (l = 1; l < layers; l++) {\n" + " total_neurons += int(network.e[l]) + 1;\n" + " total_weights += (int(network.e[l]) + 1) * int(network.e[l+1]);\n" + " }\n" + " total_weights -= (int(network.e[layers-1]) + 1) * int(network.e[layers]);\n" + " outputs = int(network.e[layers]);\n" + " for (o = idx; o < outputs; o += threads) {\n" + " neuron_diff = output_data.e[o] - values.e[total_neurons + o];\n" + " if(neuron_diff < -.9999999)\n" + " neuron_diff = -17.0;\n" + " else if(neuron_diff > .9999999)\n" + " neuron_diff = 17.0;\n" + " else\n" + " neuron_diff = log((1.0 + neuron_diff) / (1.0 - neuron_diff));\n" + " errors.e[total_neurons + o] = neuron_diff * values.e[total_neurons + o] * (1.0 - values.e[total_neurons + o]);\n" + " }\n" + " barrier();\n" + " for (l = layers; l > 2; l--) {\n" + " outputs = int(network.e[l]);\n" + " inputs = int(network.e[l-1]);\n" + " neuron_prev = total_neurons - inputs - 1;\n" + " for (i = idx; i < inputs; i += threads) {\n" + " errors.e[neuron_prev + i] = 0.0;\n" + " for (o = 0; o < outputs; o++)\n" + " errors.e[neuron_prev + i] += errors.e[total_neurons + o] * weights.e[total_weights + o * inputs + o + i];\n" + " errors.e[neuron_prev + i] *= 0.5;\n" + " if (values.e[neuron_prev + i] < 0.0)\n" + " errors.e[neuron_prev + i] *= 0.01;\n" + " }\n" + " barrier();\n" + " total_neurons = neuron_prev;\n" + " total_weights -= (int(network.e[l-2]) + 1) * inputs;\n" + " }\n" + " total_neurons = int(network.e[1]) + 1;\n" + " neuron_prev = 0;\n" + " total_weights = 0;\n" + " for (l = 2; l <= layers; l++) {\n" + " outputs = int(network.e[l]);\n" + " inputs = int(network.e[l-1]);\n" + " for (o = idx; o < outputs; o += threads) {\n" + " tmp_error = errors.e[total_neurons + o] * 0.7;\n" + " n = o * inputs + o;\n" + " for (i = 0; i <= inputs; i++)\n" + " weights.e[total_weights + n + i] += tmp_error * values.e[neuron_prev + i];\n" + " }\n" + " barrier();\n" + " neuron_prev = total_neurons;\n" + " total_neurons += outputs + 1;\n" + " total_weights += outputs * inputs + outputs;\n" + " }\n" + "}\n"; + +void fann_init_egl(void) { + int32_t fd = open ("/dev/dri/card0", O_RDWR); + if (fd <= 0) + exit(-3); + + struct gbm_device *gbm = gbm_create_device (fd); + if (gbm == NULL) + exit(-4); + + EGLDisplay dpy = eglGetPlatformDisplay (EGL_PLATFORM_GBM_MESA, gbm, NULL); + if (dpy == NULL) + exit(-5); + + EGLBoolean returnValue = eglInitialize(dpy, NULL, NULL); + if (returnValue != EGL_TRUE) { + printf("eglInitialize failed\n"); + exit(-1); + } + + EGLConfig cfg; + EGLint count; + EGLint s_configAttribs[] = { + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR, + EGL_NONE }; + if (eglChooseConfig(dpy, s_configAttribs, &cfg, 1, &count) == EGL_FALSE) { + printf("eglChooseConfig failed\n"); + exit(-1); + } + + EGLint context_attribs[] = { EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE }; + EGLContext context = eglCreateContext(dpy, cfg, EGL_NO_CONTEXT, context_attribs); + if (context == EGL_NO_CONTEXT) { + printf("eglCreateContext failed\n"); + exit(-1); + } + returnValue = eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, context); + if (returnValue != EGL_TRUE) { + printf("eglMakeCurrent failed returned %d\n", returnValue); + exit(-1); + } + + fprintf(stderr, "%s\n", glGetString(GL_VERSION)); + fprintf(stderr, "%s\n", glGetString(GL_EXTENSIONS)); +} + +void fann_create_shaders(struct fann *ann) +{ + GLint status; + GLint length; + char *log; + char *runShaderString; + char *trainShaderString; + int threads; + + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &threads); + fprintf(stderr, "GL_MAX_COMPUTE_WORK_GROUP_SIZE: %d\n", threads); + + ann->runShaderID = glCreateShader(GL_COMPUTE_SHADER); + + runShaderString = malloc(strlen(runShader) + 256); + snprintf(runShaderString, strlen(runShader) + 256 - 1, runShader, threads, threads); + int runShaderLen = strlen(runShaderString); + glShaderSource(ann->runShaderID, 1, (const char**)&runShaderString, &runShaderLen); + glCompileShader(ann->runShaderID); + glGetShaderiv(ann->runShaderID, GL_COMPILE_STATUS, &status); + if (status == GL_FALSE) { + glGetShaderiv(ann->runShaderID, GL_INFO_LOG_LENGTH, &length); + log = malloc(length+1); + glGetShaderInfoLog(ann->runShaderID, length, &length, log); + log[length] = '\0'; + fprintf(stderr, "%s", log); + exit(-1); + } + + ann->runShaderProgram = glCreateProgram(); + glAttachShader(ann->runShaderProgram, ann->runShaderID); + glLinkProgram(ann->runShaderProgram); + glGetShaderiv(ann->runShaderID, GL_LINK_STATUS, &status); + if (status == GL_FALSE) { + glGetProgramiv(ann->runShaderID, GL_INFO_LOG_LENGTH, &length); + log = malloc(length+1); + glGetProgramInfoLog(ann->runShaderID, length, &length, log); + log[length] = '\0'; + fprintf(stderr, "%s", log); + exit(-1); + } + + ann->trainShaderID = glCreateShader(GL_COMPUTE_SHADER); + + trainShaderString = malloc(strlen(trainShader) + 256); + snprintf(trainShaderString, strlen(trainShader) + 256 - 1, trainShader, threads, threads); + int trainShaderLen = strlen(trainShaderString); + glShaderSource(ann->trainShaderID, 1, (const char**)&trainShaderString, &trainShaderLen); + glCompileShader(ann->trainShaderID); + glGetShaderiv(ann->trainShaderID, GL_COMPILE_STATUS, &status); + if (status == GL_FALSE) { + glGetShaderiv(ann->trainShaderID, GL_INFO_LOG_LENGTH, &length); + log = malloc(length+1); + glGetShaderInfoLog(ann->trainShaderID, length, &length, log); + log[length] = '\0'; + fprintf(stderr, "%s", log); + exit(-1); + } + + ann->trainShaderProgram = glCreateProgram(); + glAttachShader(ann->trainShaderProgram, ann->trainShaderID); + glLinkProgram(ann->trainShaderProgram); + glGetShaderiv(ann->trainShaderID, GL_LINK_STATUS, &status); + if (status == GL_FALSE) { + glGetProgramiv(ann->trainShaderID, GL_INFO_LOG_LENGTH, &length); + log = malloc(length+1); + glGetProgramInfoLog(ann->trainShaderID, length, &length, log); + log[length] = '\0'; + fprintf(stderr, "%s", log); + exit(-1); + } + + ann->onGPU = 0; +} +#endif + /* #define FANN_NO_SEED */ FANN_EXTERNAL struct fann *FANN_API fann_create_standard(unsigned int num_layers, ...) @@ -565,7 +888,6 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) struct fann_layer *layer_it, *last_layer; unsigned int activation_function; fann_type steepness; - /* store some variabels local for fast access */ struct fann_neuron *first_neuron = ann->first_layer->first_neuron; @@ -595,15 +917,18 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) i, multiplier, multiplier, input[i]); } #endif - first_neuron[i].value = input[i]; + *(first_neuron[i].value) = input[i]; } /* Set the bias neuron in the input layer */ #ifdef FIXEDFANN - (ann->first_layer->last_neuron - 1)->value = multiplier; + *((ann->first_layer->last_neuron - 1)->value) = multiplier; #else - (ann->first_layer->last_neuron - 1)->value = 1; + *((ann->first_layer->last_neuron - 1)->value) = 1; #endif +#ifndef PLAN9 +if (ann->gl == 0) { +#endif last_layer = ann->last_layer; for(layer_it = ann->first_layer + 1; layer_it != last_layer; layer_it++) { @@ -614,9 +939,9 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) { /* bias neurons */ #ifdef FIXEDFANN - neuron_it->value = multiplier; + *(neuron_it->value) = multiplier; #else - neuron_it->value = 1; + *(neuron_it->value) = 1; #endif continue; } @@ -639,31 +964,30 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) neurons = (layer_it - 1)->first_neuron; } - /* unrolled loop start */ i = num_connections & 3; /* same as modulo 4 */ switch (i) { case 3: - neuron_sum += fann_mult(weights[2], neurons[2].value); + neuron_sum += fann_mult(weights[2], *(neurons[2].value)); case 2: - neuron_sum += fann_mult(weights[1], neurons[1].value); + neuron_sum += fann_mult(weights[1], *(neurons[1].value)); case 1: - neuron_sum += fann_mult(weights[0], neurons[0].value); + neuron_sum += fann_mult(weights[0], *(neurons[0].value)); case 0: break; } - for(; i != num_connections; i += 4) + #pragma omp parallel for reduction(+:neuron_sum) + for(i = num_connections & 3; i < num_connections; i += 4) { neuron_sum += - fann_mult(weights[i], neurons[i].value) + - fann_mult(weights[i + 1], neurons[i + 1].value) + - fann_mult(weights[i + 2], neurons[i + 2].value) + - fann_mult(weights[i + 3], neurons[i + 3].value); + fann_mult(weights[i], *(neurons[i].value)) + + fann_mult(weights[i + 1], *(neurons[i + 1].value)) + + fann_mult(weights[i + 2], *(neurons[i + 2].value)) + + fann_mult(weights[i + 3], *(neurons[i + 3].value)); } /* unrolled loop end */ - /* * for(i = 0;i != num_connections; i++){ * printf("%f += %f*%f, ", neuron_sum, weights[i], neurons[i].value); @@ -679,22 +1003,23 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) switch (i) { case 3: - neuron_sum += fann_mult(weights[2], neuron_pointers[2]->value); + neuron_sum += fann_mult(weights[2], *(neuron_pointers[2]->value)); case 2: - neuron_sum += fann_mult(weights[1], neuron_pointers[1]->value); + neuron_sum += fann_mult(weights[1], *(neuron_pointers[1]->value)); case 1: - neuron_sum += fann_mult(weights[0], neuron_pointers[0]->value); + neuron_sum += fann_mult(weights[0], *(neuron_pointers[0]->value)); case 0: break; } - for(; i != num_connections; i += 4) + #pragma omp parallel for reduction(+:neuron_sum) + for(i = num_connections & 3; i < num_connections; i += 4) { neuron_sum += - fann_mult(weights[i], neuron_pointers[i]->value) + - fann_mult(weights[i + 1], neuron_pointers[i + 1]->value) + - fann_mult(weights[i + 2], neuron_pointers[i + 2]->value) + - fann_mult(weights[i + 3], neuron_pointers[i + 3]->value); + fann_mult(weights[i], *(neuron_pointers[i]->value)) + + fann_mult(weights[i + 1], *(neuron_pointers[i + 1]->value)) + + fann_mult(weights[i + 2], *(neuron_pointers[i + 2]->value)) + + fann_mult(weights[i + 3], *(neuron_pointers[i + 3]->value)); } } @@ -744,30 +1069,36 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) { case FANN_SIGMOID: case FANN_SIGMOID_STEPWISE: - neuron_it->value = + *(neuron_it->value) = (fann_type) fann_stepwise(v1, v2, v3, v4, v5, v6, r1, r2, r3, r4, r5, r6, 0, multiplier, neuron_sum); break; case FANN_SIGMOID_SYMMETRIC: case FANN_SIGMOID_SYMMETRIC_STEPWISE: - neuron_it->value = + *(neuron_it->value) = (fann_type) fann_stepwise(v1, v2, v3, v4, v5, v6, r1, r2, r3, r4, r5, r6, -multiplier, multiplier, neuron_sum); break; case FANN_THRESHOLD: - neuron_it->value = (fann_type) ((neuron_sum < 0) ? 0 : multiplier); + *(neuron_it->value) = (fann_type) ((neuron_sum < 0) ? 0 : multiplier); break; case FANN_THRESHOLD_SYMMETRIC: - neuron_it->value = (fann_type) ((neuron_sum < 0) ? -multiplier : multiplier); + *(neuron_it->value) = (fann_type) ((neuron_sum < 0) ? -multiplier : multiplier); break; case FANN_LINEAR: - neuron_it->value = neuron_sum; + *(neuron_it->value) = neuron_sum; break; case FANN_LINEAR_PIECE: - neuron_it->value = (fann_type)((neuron_sum < 0) ? 0 : (neuron_sum > multiplier) ? multiplier : neuron_sum); + *(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0 : (neuron_sum > multiplier) ? multiplier : neuron_sum); break; case FANN_LINEAR_PIECE_SYMMETRIC: - neuron_it->value = (fann_type)((neuron_sum < -multiplier) ? -multiplier : (neuron_sum > multiplier) ? multiplier : neuron_sum); + *(neuron_it->value) = (fann_type)((neuron_sum < -multiplier) ? -multiplier : (neuron_sum > multiplier) ? multiplier : neuron_sum); + break; + case FANN_LINEAR_PIECE_LEAKY: + *(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0.01 * neuron_sum: neuron_sum); + break; + case FANN_LINEAR_PIECE_RECT: + *(neuron_it->value) = (fann_type)((neuron_sum < 0) ? 0: neuron_sum); break; case FANN_ELLIOT: case FANN_ELLIOT_SYMMETRIC: @@ -792,7 +1123,7 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) neuron_it->sum = neuron_sum; - fann_activation_switch(activation_function, neuron_sum, neuron_it->value); + fann_activation_switch(activation_function, neuron_sum, *(neuron_it->value)); #endif } } @@ -803,11 +1134,123 @@ FANN_EXTERNAL fann_type *FANN_API fann_run(struct fann * ann, fann_type * input) neurons = (ann->last_layer - 1)->first_neuron; for(i = 0; i != num_output; i++) { - output[i] = neurons[i].value; + output[i] = *(neurons[i].value); + } +#ifndef PLAN9 +} else { + GLenum err; + + if (ann->onGPU == 0) { + fann_init_gpu(ann); + + ann->onGPU = 1; } + + for (i = 0; i < ann->num_input; i++) + ann->glinputdata[i] = input[i]; + + glFinish(); + glUseProgram(ann->runShaderProgram); + glDispatchCompute(1, 1, 1); + glMemoryBarrier(GL_ALL_BARRIER_BITS); + glFinish(); + + for(i = 0; i != ann->num_output; i++) + ann->output[i] = ann->gloutputdata[i]; +} +#endif return ann->output; } +#ifndef PLAN9 +FANN_EXTERNAL void FANN_API fann_from_gpu(struct fann *ann) +{ + GLfloat *data; + int i; + + if (ann->gl != 0) { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glweights); + data = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->total_connections * sizeof(GLfloat), GL_MAP_READ_BIT); + for(i = 0; i != ann->total_connections; i++) + ann->weights[i] = data[i]; + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } +} + +FANN_EXTERNAL void FANN_API fann_init_gpu(struct fann *ann) +{ + GLfloat *data; + GLfloat *glvalues; + GLfloat *glweights; + int nparameters; + GLfloat *parameters; + int i; + struct fann_layer *layer_it; + + glGenBuffers(1, &ann->glnetwork); + + nparameters = 1; + nparameters += (int)(ann->last_layer - ann->first_layer); + parameters = calloc(sizeof(GLfloat), nparameters); + parameters[0] = nparameters - 1; + for(i = 1, layer_it = ann->first_layer; layer_it != ann->last_layer; layer_it++, i++) + parameters[i] = (int)(layer_it->last_neuron - layer_it->first_neuron) - 1; + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glnetwork); + glBufferData(GL_SHADER_STORAGE_BUFFER, nparameters * sizeof(GLfloat), parameters, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ann->glnetwork); + +// free(parameters); + + glGenBuffers(1, &ann->glweights); + + glweights = calloc(sizeof(GLfloat), ann->total_connections); + for (i = 0; i != ann->total_connections; i++) + glweights[i] = ann->weights[i]; + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glweights); + glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_connections * sizeof(GLfloat), glweights, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ann->glweights); + +// free(glweights); + + glGenBuffers(1, &ann->glvalues); + + glvalues = calloc(sizeof(GLfloat), ann->total_neurons); + for (i = 0; i != ann->total_neurons; i++) + glvalues[i] = ann->values[i]; + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glvalues); + glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_neurons * sizeof(GLfloat), glvalues, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ann->glvalues); + +// free(glvalues); + + glGenBuffers(1, &ann->glerrors); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glerrors); + glBufferData(GL_SHADER_STORAGE_BUFFER, ann->total_neurons * sizeof(GLfloat), NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, ann->glerrors); + + glGenBuffers(1, &ann->glinput); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glinput); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, ann->num_input * sizeof(GLfloat), NULL, GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_COHERENT_BIT); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, ann->glinput); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->glinput); + ann->glinputdata = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->num_input * sizeof(GLfloat), GL_MAP_WRITE_BIT|GL_MAP_COHERENT_BIT|GL_MAP_PERSISTENT_BIT); + + glGenBuffers(1, &ann->gloutput); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->gloutput); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, ann->num_output * sizeof(GLfloat), NULL, GL_MAP_READ_BIT|GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_COHERENT_BIT); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, ann->gloutput); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ann->gloutput); + ann->gloutputdata = (GLfloat*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, ann->num_output * sizeof(GLfloat), GL_MAP_READ_BIT|GL_MAP_WRITE_BIT|GL_MAP_COHERENT_BIT|GL_MAP_PERSISTENT_BIT); + +} +#endif /* PLAN9 */ + FANN_EXTERNAL void FANN_API fann_destroy(struct fann *ann) { if(ann == NULL) @@ -1580,6 +2023,9 @@ struct fann *fann_allocate_structure(unsigned int num_layers) return NULL; } +#ifndef PLAN9 + ann->gl = 0; +#endif ann->errno_f = FANN_E_NO_ERROR; ann->error_log = fann_default_error_log; ann->errstr = NULL; @@ -1708,6 +2154,11 @@ struct fann *fann_allocate_structure(unsigned int num_layers) ann->last_layer = ann->first_layer + num_layers; +#ifndef PLAN9 + fann_init_egl(); + fann_create_shaders(ann); +#endif + return ann; } @@ -1756,10 +2207,12 @@ void fann_allocate_neurons(struct fann *ann) struct fann_neuron *neurons; unsigned int num_neurons_so_far = 0; unsigned int num_neurons = 0; + unsigned int i; /* all the neurons is allocated in one long array (calloc clears mem) */ neurons = (struct fann_neuron *) calloc(ann->total_neurons, sizeof(struct fann_neuron)); ann->total_neurons_allocated = ann->total_neurons; + ann->values = calloc(ann->total_neurons, sizeof(fann_type)); if(neurons == NULL) { @@ -1772,6 +2225,10 @@ void fann_allocate_neurons(struct fann *ann) num_neurons = (unsigned int)(layer_it->last_neuron - layer_it->first_neuron); layer_it->first_neuron = neurons + num_neurons_so_far; layer_it->last_neuron = layer_it->first_neuron + num_neurons; + layer_it->values = &ann->values[num_neurons_so_far]; + for (i = 0; i < num_neurons; i++) { + neurons[num_neurons_so_far + i].value = &(layer_it->values[i]); + } num_neurons_so_far += num_neurons; } @@ -1828,7 +2285,7 @@ FANN_EXTERNAL void FANN_API fann_enable_seed_rand() /* INTERNAL FUNCTION Seed the random function. */ -void fann_seed_rand() +void fann_seed_rand(void) { #ifndef _WIN32 FILE *fp = fopen("/dev/urandom", "r"); diff --git a/src/fann_cascade.c b/src/fann_cascade.c index 51954907..bfab8cd4 100644 --- a/src/fann_cascade.c +++ b/src/fann_cascade.c @@ -17,6 +17,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifdef PLAN9 +#include +#include +#include +#endif + #include "config.h" #include "fann.h" #include "string.h" @@ -446,7 +452,7 @@ int fann_initialize_candidates(struct fann *ann) /* TODO candidates should actually be created both in * the last layer before the output layer, and in a new layer. */ - neurons[candidate_index].value = 0; + *(neurons[candidate_index].value) = 0; neurons[candidate_index].sum = 0; neurons[candidate_index].activation_function = @@ -582,11 +588,11 @@ void fann_update_candidate_slopes(struct fann *ann) switch (i) { case 3: - cand_sum += weights[2] * neurons[2].value; + cand_sum += weights[2] * *(neurons[2].value); case 2: - cand_sum += weights[1] * neurons[1].value; + cand_sum += weights[1] * *(neurons[1].value); case 1: - cand_sum += weights[0] * neurons[0].value; + cand_sum += weights[0] * *(neurons[0].value); case 0: break; } @@ -594,9 +600,9 @@ void fann_update_candidate_slopes(struct fann *ann) for(; i != num_connections; i += 4) { cand_sum += - weights[i] * neurons[i].value + - weights[i + 1] * neurons[i + 1].value + - weights[i + 2] * neurons[i + 2].value + weights[i + 3] * neurons[i + 3].value; + weights[i] * *(neurons[i].value) + + weights[i + 1] * *(neurons[i + 1].value) + + weights[i + 2] * *(neurons[i + 2].value) + weights[i + 3] * *(neurons[i + 3].value); } /* * for(i = 0; i < num_connections; i++){ @@ -617,7 +623,7 @@ void fann_update_candidate_slopes(struct fann *ann) /* printf("%f = sigmoid(%f);\n", activation, cand_sum); */ cand_it->sum = cand_sum; - cand_it->value = activation; + *(cand_it->value) = activation; derived = fann_activation_derived(cand_it->activation_function, cand_it->activation_steepness, activation, cand_sum); @@ -655,7 +661,7 @@ void fann_update_candidate_slopes(struct fann *ann) cand_slopes = ann->train_slopes + cand_it->first_con; for(i = 0; i < num_connections; i++) { - cand_slopes[i] -= error_value * neurons[i].value; + cand_slopes[i] -= error_value * *(neurons[i].value); } } } @@ -741,6 +747,8 @@ fann_type fann_train_candidates_epoch(struct fann *ann, struct fann_train_data * case FANN_GAUSSIAN_STEPWISE: case FANN_ELLIOT: case FANN_LINEAR_PIECE: + case FANN_LINEAR_PIECE_LEAKY: + case FANN_LINEAR_PIECE_RECT: case FANN_SIN: case FANN_COS: break; @@ -923,7 +931,7 @@ void fann_add_candidate_neuron(struct fann *ann, struct fann_layer *layer) } /* Now inititalize the actual neuron */ - neuron_place->value = 0; + *(neuron_place->value) = 0; neuron_place->sum = 0; neuron_place->activation_function = candidate->activation_function; neuron_place->activation_steepness = candidate->activation_steepness; diff --git a/src/fann_io.c b/src/fann_io.c index 40fd5f1a..39778239 100644 --- a/src/fann_io.c +++ b/src/fann_io.c @@ -19,9 +19,14 @@ #include #include +#ifndef PLAN9 #include +#endif #include #include +#ifdef PLAN9 +#include +#endif #include "config.h" #include "fann.h" @@ -41,6 +46,7 @@ FANN_EXTERNAL struct fann *FANN_API fann_create_from_file(const char *configurat } ann = fann_create_from_fd(conf, configuration_file); fclose(conf); + return ann; } diff --git a/src/fann_train.c b/src/fann_train.c index 049e6de9..19e4d738 100644 --- a/src/fann_train.c +++ b/src/fann_train.c @@ -17,6 +17,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifdef PLAN9 +#define sqrtf sqrt +#endif + #include #include #include @@ -42,6 +46,10 @@ fann_type fann_activation_derived(unsigned int activation_function, case FANN_LINEAR_PIECE: case FANN_LINEAR_PIECE_SYMMETRIC: return (fann_type) fann_linear_derive(steepness, value); + case FANN_LINEAR_PIECE_LEAKY: + return (fann_type) ((value<0)? steepness * 0.01: steepness); + case FANN_LINEAR_PIECE_RECT: + return (fann_type) ((value<0)? 0: steepness); case FANN_SIGMOID: case FANN_SIGMOID_STEPWISE: value = fann_clip(value, 0.01f, 0.99f); @@ -95,11 +103,39 @@ FANN_EXTERNAL void FANN_API fann_train(struct fann *ann, fann_type * input, { fann_run(ann, input); +#ifndef PLAN9 +if (ann->gl == 0) { +#endif fann_compute_MSE(ann, desired_output); fann_backpropagate_MSE(ann); fann_update_weights(ann); +#ifndef PLAN9 +} else { + int i; + fann_type err; + GLfloat *errors; + GLenum glerr; + + for (i = 0; i < ann->num_output; i++) { + err = desired_output[i] - ann->output[i]; + ann->MSE_value += err * err; + } + + for (i = 0; i < ann->num_input; i++) + ann->glinputdata[i] = input[i]; + + for (i = 0; i < ann->num_output; i++) + ann->gloutputdata[i] = desired_output[i]; + + glFinish(); + glUseProgram(ann->trainShaderProgram); + glDispatchCompute(1, 1, 1); + glMemoryBarrier(GL_ALL_BARRIER_BITS); + glFinish(); +} +#endif } #endif @@ -133,6 +169,8 @@ fann_type fann_update_MSE(struct fann *ann, struct fann_neuron* neuron, fann_typ case FANN_LINEAR_PIECE: case FANN_SIN: case FANN_COS: + case FANN_LINEAR_PIECE_LEAKY: + case FANN_LINEAR_PIECE_RECT: break; } @@ -256,7 +294,7 @@ void fann_compute_MSE(struct fann *ann, fann_type * desired_output) for(; last_layer_begin != last_layer_end; last_layer_begin++) { - neuron_value = last_layer_begin->value; + neuron_value = *(last_layer_begin->value); neuron_diff = *desired_output - neuron_value; neuron_diff = fann_update_MSE(ann, last_layer_begin, neuron_diff); @@ -274,12 +312,13 @@ void fann_compute_MSE(struct fann *ann, fann_type * desired_output) *error_it = fann_activation_derived(last_layer_begin->activation_function, last_layer_begin->activation_steepness, neuron_value, last_layer_begin->sum) * neuron_diff; - +// fprintf(stderr, "%0.10f ", *error_it); desired_output++; error_it++; ann->num_MSE++; } +// fprintf(stderr, "\n"); } /* INTERNAL FUNCTION @@ -357,7 +396,7 @@ void fann_backpropagate_MSE(struct fann *ann) for(neuron_it = (layer_it - 1)->first_neuron; neuron_it != last_neuron; neuron_it++) { *error_prev_layer *= fann_activation_derived(neuron_it->activation_function, - neuron_it->activation_steepness, neuron_it->value, neuron_it->sum); + neuron_it->activation_steepness, *(neuron_it->value), neuron_it->sum); error_prev_layer++; } @@ -377,7 +416,7 @@ void fann_update_weights(struct fann *ann) /* store some variabels local for fast access */ const float learning_rate = ann->learning_rate; - const float learning_momentum = ann->learning_momentum; + const float learning_momentum = ann->learning_momentum; struct fann_neuron *first_neuron = ann->first_layer->first_neuron; struct fann_layer *first_layer = ann->first_layer; const struct fann_layer *last_layer = ann->last_layer; @@ -421,7 +460,7 @@ void fann_update_weights(struct fann *ann) weights_deltas = deltas_begin + neuron_it->first_con; for(i = 0; i != num_connections; i++) { - delta_w = tmp_error * prev_neurons[i].value + learning_momentum * weights_deltas[i]; + delta_w = tmp_error * *(prev_neurons[i].value) + learning_momentum * weights_deltas[i]; weights[i] += delta_w ; weights_deltas[i] = delta_w; } @@ -437,7 +476,7 @@ void fann_update_weights(struct fann *ann) weights_deltas = deltas_begin + neuron_it->first_con; for(i = 0; i != num_connections; i++) { - delta_w = tmp_error * prev_neurons[i].value + learning_momentum * weights_deltas[i]; + delta_w = tmp_error * *(prev_neurons[i].value) + learning_momentum * weights_deltas[i]; weights[i] += delta_w; weights_deltas[i] = delta_w; } @@ -514,7 +553,7 @@ void fann_update_slopes_batch(struct fann *ann, struct fann_layer *layer_begin, num_connections = neuron_it->last_con - neuron_it->first_con; for(i = 0; i != num_connections; i++) { - neuron_slope[i] += tmp_error * prev_neurons[i].value; + neuron_slope[i] += tmp_error * *(prev_neurons[i].value); } } } @@ -528,7 +567,7 @@ void fann_update_slopes_batch(struct fann *ann, struct fann_layer *layer_begin, connections = ann->connections + neuron_it->first_con; for(i = 0; i != num_connections; i++) { - neuron_slope[i] += tmp_error * connections[i]->value; + neuron_slope[i] += tmp_error * *(connections[i]->value); } } } diff --git a/src/fann_train_data.c b/src/fann_train_data.c index b93ec35d..a431954c 100644 --- a/src/fann_train_data.c +++ b/src/fann_train_data.c @@ -21,6 +21,10 @@ #include #include #include +#ifdef PLAN9 +#include +#define sqrtf sqrt +#endif #include "config.h" #include "fann.h" diff --git a/src/fixedfann.c b/src/fixedfann.c index a48cd085..3f2b1465 100644 --- a/src/fixedfann.c +++ b/src/fixedfann.c @@ -19,6 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* Easy way to allow for build of multiple binaries */ +#ifdef PLAN9 +#include +#endif + #include "config.h" #include "fixedfann.h" diff --git a/src/floatfann.c b/src/floatfann.c index b9ad0dd0..fae67a4a 100644 --- a/src/floatfann.c +++ b/src/floatfann.c @@ -19,6 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* Easy way to allow for build of multiple binaries */ +#ifdef PLAN9 +#include +#endif + #include "config.h" #include "floatfann.h" @@ -28,4 +32,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "fann_train_data.c" #include "fann_error.c" #include "fann_cascade.c" +#ifndef PLAN9 #include "parallel_fann.c" +#endif diff --git a/src/include/fann.h b/src/include/fann.h index 5adda119..82289b24 100644 --- a/src/include/fann.h +++ b/src/include/fann.h @@ -37,6 +37,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* Group: Creation, Destruction & Execution */ #ifndef FANN_INCLUDE +#ifdef _PLAN9_SOURCE +#pragma lib "/$M/lib/ape/libfann.a" +#endif + /* just to allow for inclusion of fann.h in normal stuations where only floats are needed */ #ifdef FIXEDFANN #include "fixedfann.h" @@ -45,7 +49,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #endif /* FIXEDFANN */ #else - + +#ifndef PLAN9 + /* COMPAT_TIME REPLACEMENT */ #ifndef _WIN32 #include @@ -58,6 +64,8 @@ extern unsigned long __stdcall GetTickCount(void); #include #endif /* _MSC_EXTENSIONS */ #endif /* _WIN32 */ + +#endif #ifndef __fann_h__ #define __fann_h__ @@ -113,11 +121,14 @@ extern "C" to use dll's. To use dll's FANN_USE_DLL has to be defined before including the fann headers. */ +#ifndef PLAN9 #if defined(_MSC_VER) && (_MSC_VER > 1300) #ifndef FANN_NO_DLL #define FANN_USE_DLL #endif /* FANN_USE_LIB */ #endif /* _MSC_VER */ +#endif +#ifndef PLAN9 #if defined(_MSC_VER) && (defined(FANN_USE_DLL) || defined(FANN_DLL_EXPORTS)) #ifdef FANN_DLL_EXPORTS #define FANN_EXTERNAL __declspec(dllexport) @@ -128,7 +139,11 @@ extern "C" #else /* */ #define FANN_EXTERNAL #define FANN_API +#ifndef PLAN9 +#include +#endif #endif /* _MSC_VER */ +#endif /* ----- End of macros used to define DLL external entrypoints ----- */ #include "fann_error.h" @@ -139,6 +154,8 @@ extern "C" #include "fann_cascade.h" #include "fann_io.h" +#define MULTILINE_STRING(...) #__VA_ARGS__ + /* Function: fann_create_standard Creates a standard fully connected backpropagation neural network. @@ -615,6 +632,13 @@ FANN_EXTERNAL void FANN_API fann_disable_seed_rand(); */ FANN_EXTERNAL void FANN_API fann_enable_seed_rand(); +#ifndef PLAN9 + +FANN_EXTERNAL void FANN_API fann_from_gpu(struct fann *ann); + +FANN_EXTERNAL void FANN_API fann_init_gpu(struct fann *ann); + +#endif /* PLAN9 */ #ifdef FIXEDFANN diff --git a/src/include/fann_activation.h b/src/include/fann_activation.h index 80cab7ca..eba684a4 100644 --- a/src/include/fann_activation.h +++ b/src/include/fann_activation.h @@ -33,13 +33,19 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA __doublefann_h__ is not defined */ #ifndef __doublefann_h__ +#ifndef PLAN9 #define FANN_EXP(x) expf(x) -#define FANN_SIN(x) sinf(x) -#define FANN_COS(x) cosf(x) +#define _FANN_SIN(x) sinf(x) +#define _FANN_COS(x) cosf(x) #else #define FANN_EXP(x) exp(x) -#define FANN_SIN(x) sin(x) -#define FANN_COS(x) cos(x) +#define _FANN_SIN(x) sin(x) +#define _FANN_COS(x) cos(x) +#endif +#else +#define FANN_EXP(x) exp(x) +#define _FANN_SIN(x) sin(x) +#define _FANN_COS(x) cos(x) #endif #define fann_linear_func(v1, r1, v2, r2, sum) (((((r2)-(r1)) * ((sum)-(v1)))/((v2)-(v1))) + (r1)) @@ -80,19 +86,19 @@ __doublefann_h__ is not defined #define fann_elliot_symmetric_derive(steepness, value, sum) (steepness * 1.0f / ((1.0f + fann_abs(sum)) * (1.0f + fann_abs(sum)))) /* FANN_SIN_SYMMETRIC */ -#define fann_sin_symmetric_real(sum) (FANN_SIN(sum)) +#define fann_sin_symmetric_real(sum) (_FANN_SIN(sum)) #define fann_sin_symmetric_derive(steepness, sum) (steepness*cos(steepness*sum)) /* FANN_COS_SYMMETRIC */ -#define fann_cos_symmetric_real(sum) (FANN_COS(sum)) +#define fann_cos_symmetric_real(sum) (_FANN_COS(sum)) #define fann_cos_symmetric_derive(steepness, sum) (steepness*-sin(steepness*sum)) /* FANN_SIN */ -#define fann_sin_real(sum) (FANN_SIN(sum)/2.0f+0.5f) +#define fann_sin_real(sum) (_FANN_SIN(sum)/2.0f+0.5f) #define fann_sin_derive(steepness, sum) (steepness*cos(steepness*sum)/2.0f) /* FANN_COS */ -#define fann_cos_real(sum) (FANN_COS(sum)/2.0f+0.5f) +#define fann_cos_real(sum) (_FANN_COS(sum)/2.0f+0.5f) #define fann_cos_derive(steepness, sum) (steepness*-sin(steepness*sum)/2.0f) #define fann_activation_switch(activation_function, value, result) \ @@ -152,6 +158,12 @@ switch(activation_function) \ case FANN_GAUSSIAN_STEPWISE: \ result = 0; \ break; \ + case FANN_LINEAR_PIECE_LEAKY: \ + result = (fann_type)((value < 0) ? value*0.01 : value); \ + break; \ + case FANN_LINEAR_PIECE_RECT: \ + result = (fann_type)((value < 0) ? 0 : value); \ + break; \ } #endif diff --git a/src/include/fann_data.h b/src/include/fann_data.h index 99f42c76..9e7dd6ed 100644 --- a/src/include/fann_data.h +++ b/src/include/fann_data.h @@ -197,6 +197,16 @@ static char const *const FANN_TRAIN_NAMES[] = { * span: 0 <= y <= 1 * y = cos(x*s)/2+0.5 * d = s*-sin(x*s)/2 + + FANN_LINEAR_PIECE_LEAKY - leaky ReLU + * span: -inf < y < inf + * y = x<0? 0.01*x: x + * d = x<0? 0.01: 1 + + FANN_LINEAR_PIECE_RECT - ReLU + * span: -inf < y < inf + * y = x<0? 0: x + * d = x<0? 0: 1 See also: , , @@ -226,7 +236,9 @@ enum fann_activationfunc_enum FANN_SIN_SYMMETRIC, FANN_COS_SYMMETRIC, FANN_SIN, - FANN_COS + FANN_COS, + FANN_LINEAR_PIECE_LEAKY, + FANN_LINEAR_PIECE_RECT, }; /* Constant: FANN_ACTIVATIONFUNC_NAMES @@ -258,7 +270,9 @@ static char const *const FANN_ACTIVATIONFUNC_NAMES[] = { "FANN_SIN_SYMMETRIC", "FANN_COS_SYMMETRIC", "FANN_SIN", - "FANN_COS" + "FANN_COS", + "FANN_LINEAR_PIECE_LEAKY", + "FANN_LINEAR_PIECE_RECT" }; /* Enum: fann_errorfunc_enum @@ -420,7 +434,7 @@ struct fann_neuron /* The sum of the inputs multiplied with the weights */ fann_type sum; /* The value of the activation function applied to the sum */ - fann_type value; + fann_type *value; /* The steepness of the activation function */ fann_type activation_steepness; /* Used to choose which activation function to use */ @@ -445,6 +459,8 @@ struct fann_layer /* A pointer to the neuron past the last neuron in the layer */ /* the number of neurons is last_neuron - first_neuron */ struct fann_neuron *last_neuron; + + fann_type *values; }; /* Struct: struct fann_error @@ -764,7 +780,30 @@ struct fann * Not allocated if not used. */ fann_type *prev_weights_deltas; - + +#ifndef PLAN9 + GLuint runShaderID; + GLuint runShaderProgram; + + GLuint trainShaderID; + GLuint trainShaderProgram; + + unsigned char onGPU; + unsigned char gl; + + GLuint glerrors; + GLuint glweights; + GLuint glvalues; + GLuint glnetwork; + GLuint glinput; + GLuint gloutput; + + GLfloat *glinputdata; + GLfloat *gloutputdata; +#endif /* PLAN9 */ + + fann_type *values; + #ifndef FIXEDFANN /* Arithmetic mean used to remove steady component in input data. */ float *scale_mean_in; diff --git a/src/include/fann_data_cpp.h b/src/include/fann_data_cpp.h index 5b378bb4..cb8a20c8 100644 --- a/src/include/fann_data_cpp.h +++ b/src/include/fann_data_cpp.h @@ -205,6 +205,16 @@ namespace FANN { * y = cos(x*s) * d = s*-sin(x*s) + FANN_LINEAR_PIECE_LEAKY - leaky ReLU + * span: -inf < y < inf + y = x<0? 0.01*x: x + d = x<0? 0.01: 1 + + FANN_LINEAR_PIECE_RECT - ReLU + * span: -inf < y < inf + y = x<0? 0: x + d = x<0? 0: 1 + See also: , @@ -225,7 +235,9 @@ namespace FANN { LINEAR_PIECE, LINEAR_PIECE_SYMMETRIC, SIN_SYMMETRIC, - COS_SYMMETRIC + COS_SYMMETRIC, + LINEAR_PIECE_LEAKY, + LINEAR_PIECE_RECT }; /* Enum: network_type_enum diff --git a/src/include/fann_error.h b/src/include/fann_error.h index 69877f75..f119abd6 100644 --- a/src/include/fann_error.h +++ b/src/include/fann_error.h @@ -21,6 +21,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #define __fann_error_h__ #include +#ifdef PLAN9 +#define FANN_EXTERNAL +#define FANN_API +#endif #define FANN_ERRSTR_MAX 128 struct fann_error; diff --git a/src/include/fann_internal.h b/src/include/fann_internal.h index 81787b3d..2bf2de19 100644 --- a/src/include/fann_internal.h +++ b/src/include/fann_internal.h @@ -22,9 +22,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA /* internal include file, not to be included directly */ +#ifndef PLAN9 #include #include #include +#endif #include "fann_data.h" #define FANN_FIX_VERSION "FANN_FIX_2.0" @@ -70,7 +72,7 @@ int fann_save_train_internal_fd(struct fann_train_data *data, FILE * file, const unsigned int save_as_fixed, unsigned int decimal_point); void fann_update_stepwise(struct fann *ann); -void fann_seed_rand(); +void fann_seed_rand(void); void fann_error(struct fann_error *errdat, const enum fann_errno_enum errno_f, ...); void fann_init_error_data(struct fann_error *errdat); diff --git a/src/mkfile b/src/mkfile new file mode 100644 index 00000000..7597b83e --- /dev/null +++ b/src/mkfile @@ -0,0 +1,31 @@ + +#include +#include + +void +fanntest(struct fann *ann, fann_type *input, fann_type *output, fann_type *desired_output, int gl) +{ + double a, b; + struct timeval now; + int o; + + ann->gl = gl; + + gettimeofday(&now, NULL); + b = now.tv_sec * 1000000; + b += now.tv_usec; + + fann_reset_MSE(ann); + fann_train(ann, input, desired_output); + + gettimeofday(&now, NULL); + a = now.tv_sec * 1000000; + a += now.tv_usec; + + fprintf(stderr, "%cPU: %f microseconds MSE: %0.10lf\n", gl? 'G': 'C', a - b, ann->MSE_value); +} + +int +main(int argc, char **argv) +{ + fann_type *input; + fann_type *output; + fann_type *desired_output; + struct fann *ann; + int i; + GLfloat *data; + + if (argc < 2) + return -1; + + i = atoi(argv[1]); + + ann = fann_create_standard(5, i, i, i, i, i); + fann_set_activation_function_hidden(ann, FANN_LINEAR_PIECE_LEAKY); + fann_set_activation_function_output(ann, FANN_SIGMOID); + input = calloc(sizeof(fann_type), ann->num_input); + desired_output = calloc(sizeof(fann_type), ann->num_output); + + srand(time(NULL)); + + for (i = 0; i < ann->num_input; i++) + input[i] = ((float)rand()/RAND_MAX)-0.5; + + for (i = 0; i < ann->num_output; i++) + desired_output[i] = ((float)rand()/RAND_MAX)-0.5; + + fann_print_parameters(ann); + + for (i = 0; i < 10; i++) { + fanntest(ann, input, output, desired_output, 1); + fanntest(ann, input, output, desired_output, 0); + } + + return 0; +} + diff --git a/tests/xortest.c b/tests/xortest.c new file mode 100644 index 00000000..195c2b19 --- /dev/null +++ b/tests/xortest.c @@ -0,0 +1,27 @@ +#include + +int +main() { + int i; + struct fann *ann = fann_create_standard(3, 2, 5, 1); + fann_type input[4][2] = { + { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 0.0, 1.0 }, + { 1.0, 1.0 } + }; + fann_type output[4][1] = { + { 0.0 }, + { 1.0 }, + { 1.0 }, + { 0.0 } + }; + + do { + fann_reset_MSE(ann); + for (i = 0; i < 4; i++) + fann_train(ann, input[i], output[i]); + } while (ann->MSE_value > 0.001); + + fprintf(stderr, "MSE: %f\n", ann->MSE_value); +}