diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0a6e797..f4221be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - app: [pendulum, tic-tac-toe, mnist, stickgame] + app: [pendulum, tic-tac-toe, mnist, stickgame, gridworld] steps: - name: Checkout uses: actions/checkout@v2 diff --git a/gridworld/CMakeLists.txt b/gridworld/CMakeLists.txt new file mode 100644 index 0000000..8eee8c6 --- /dev/null +++ b/gridworld/CMakeLists.txt @@ -0,0 +1,101 @@ +cmake_minimum_required(VERSION 3.12.4) + +# ******************************************* +# ************* CMake Content *************** +# ******************************************* +# This CMake create a workspace containing the following projects +# +# Programs +# - gridworld + +set (PROJECT_NAME gridworld) + +project(${PROJECT_NAME}) + +# Add definition for relative path into project +add_definitions( -DPROJECT_ROOT_PATH="${CMAKE_CURRENT_SOURCE_DIR}") + +# Disable C and C++ compiler extensions. +# C/CXX_EXTENSIONS are ON by default to allow the compilers to use extended +# variants of the C/CXX language. +# However, this could expose cross-platform bugs in user code or in the headers +# of third-party dependencies and thus it is strongly suggested to turn +# extensions off. +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +if(NOT ${CMAKE_GENERATOR} MATCHES "Visual Studio.*") + + # Link with pthread + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") + + # Debug or release + if(CMAKE_BUILD_TYPE MATCHES "Debug") + MESSAGE("Generate Debug project") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Debug) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -pg -Wall") + else() + MESSAGE("Generate Release project") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Release) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall") + endif() + #add libmath during non visual studio builds + set(CMAKE_EXTRA_LIB m) +else() + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) +endif() + +# Add definitions for testing purposes +if(${TESTING}) + MESSAGE("Testing mode") + add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2) +endif() + +# ******************************************* +# *********** GEGELATI LIBRARY ************** +# ******************************************* + +if(WIN32) + set(LIBS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib) + # find the gegelatilib-x.y.z folder in the lib directory. + file(GLOB GEGELATI_ROOT_DIR "${LIBS_DIR}/gegelatilib-[\\.|0-9]*") + set(ENV{GEGELATI_DIR} ${GEGELATI_ROOT_DIR}) +endif() +find_package(GEGELATI) + + +if (WIN32) + file(GLOB + GEGELATI_DLL + ${GEGELATI_ROOT_DIR}/bin/*.dll + ) + + MESSAGE("Copy GEGELATI DLLs into ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) + if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") + file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug) + file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Release) + endif() +endif() + +# ******************************************* +# ************** Executable **************** +# ******************************************* + +# Executable to learn the TPG +file(GLOB_RECURSE + gridworld_files + ./src/*.cpp + ./src/*.h + ./params.json +) + + +include_directories(${GEGELATI_INCLUDE_DIRS} ) +add_executable(${PROJECT_NAME} ${gridworld_files}) +target_link_libraries(${PROJECT_NAME} ${GEGELATI_LIBRARIES}) +target_compile_definitions(${PROJECT_NAME} PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}") diff --git a/gridworld/README.md b/gridworld/README.md new file mode 100644 index 0000000..f5173e8 --- /dev/null +++ b/gridworld/README.md @@ -0,0 +1,23 @@ +# GridWorld + +This application teaches a learning agent built with the [GEGELATI library](https://github.com/gegelati/gegelati) how to get out of a gridWorld + +The gridWorld is a grid composed of 0, 1, 2, and 3. + * 0 is an available tile + * 1 is a good output tile + * 2 is a bad output tile + * 3 is unavailable tile + +The agent start at coordonate (0, 0). It can go left, right, up and down. + +It get a reward of -1 if it reach a tile with value 0. +If it reach a tile a value 1 or 2, it terminate the environnement and the agent get a reward of respectively 100 or -100. + +## How to Build? +The build process of applications relies on [cmake](https://cmake.org) to configure a project for a wide variety of development environments and operating systems. Install [cmake](https://cmake.org/download/) on your system before building the application. + +### Under windows +1. Copy the `gegelatilib-` folder containing the binaries of the [GEGELATI library](https://github.com/gegelati/gegelati) into the `lib` folder. +2. Open a command line interface in the `bin` folder. +3. Enter the following command to create the project for your favorite IDE `cmake ..`. +4. Open the project created in the `bin` folder, or launch the build with the following command: `cmake --build .`. diff --git a/gridworld/bin/.dummy b/gridworld/bin/.dummy new file mode 100644 index 0000000..422826c --- /dev/null +++ b/gridworld/bin/.dummy @@ -0,0 +1 @@ +This file exists only to force the presence of the lib folder in the git repository. diff --git a/gridworld/lib/.dummy b/gridworld/lib/.dummy new file mode 100644 index 0000000..422826c --- /dev/null +++ b/gridworld/lib/.dummy @@ -0,0 +1 @@ +This file exists only to force the presence of the lib folder in the git repository. diff --git a/gridworld/params.json b/gridworld/params.json new file mode 100644 index 0000000..da97d6f --- /dev/null +++ b/gridworld/params.json @@ -0,0 +1,113 @@ +{ + // Number of recordings held in the Archive. + // "archiveSize" : 50, // Default value + "archiveSize" : 2000, + // Probability of archiving the result of each Program execution. + // "archivingProbability" : 0.05, // Default value + "archivingProbability" : 0.01, + // Boolean used to activate an evaluation of the surviving roots in validation + // mode after the training at each generation. + // "doValidation" : false, // Default value + "doValidation" : false, + // Maximum number of actions performed on the learning environment during the + // each evaluation of a root. + // "maxNbActionsPerEval" : 1000, // Default value + "maxNbActionsPerEval" : 100, + // Maximum number of times a given root is evaluated.After this number is + // reached, possibly after several generations, the score of the root will be + // fixed, and no further evaluation will be done. + // "maxNbEvaluationPerPolicy" : 1000, // Default value + "maxNbEvaluationPerPolicy" : 10, + "mutation" : + { + "prog" : + { + // Maximum constant value possible. + // "maxConstValue" : 100, // Default value + "maxConstValue" : 10, + // Maximum number of Line within the Program of the TPG. + // "maxProgramSize" : 96, // Default value + "maxProgramSize" : 20, + // Minimum constant value possible. + // "minConstValue" : -10, // Default value + "minConstValue" : -10, + // Probability of inserting a line in the Program. + // "pAdd" : 0.5, // Default value + "pAdd" : 0.5, + // Probability of each constant to be mutated. + // "pConstantMutation" : 0.5, // Default value + "pConstantMutation" : 0.5, + // Probability of deleting a line of the Program. + // "pDelete" : 0.5, // Default value + "pDelete" : 0.5, + // Probability of altering a line of the Program. + // "pMutate" : 1.0, // Default value + "pMutate" : 1.0, + // Probability of swapping two lines of the Program. + // "pSwap" : 1.0, // Default value + "pSwap" : 1.0 + }, + "tpg" : + { + // When a Program is mutated, makes sure its behavior is no longer the same. + // "forceProgramBehaviorChangeOnMutation" : false, // Default value + "forceProgramBehaviorChangeOnMutation" : true, + // Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when + // initialized. + // "maxInitOutgoingEdges" : 3, // Default value + "maxInitOutgoingEdges" : 4, + // Maximum number of outgoing edge during TPGGraph mutations. + // "maxOutgoingEdges" : 5, // Default value + "maxOutgoingEdges" : 10, + // Number of TPGAction vertex of the initialized TPGGraph. + // This parameter is generally automatically set by the LearningEnvironment. + // /* "nbActions" : 0,*/ // Commented by default + /* "nbActions" : 0,*/ + // Number of root TPGTeams to maintain when populating the TPGGraph + // "nbRoots" : 100, // Default value + "nbRoots" : 500, + // Probability of adding an outgoing Edge to a Team. + // "pEdgeAddition" : 0.7, // Default value + "pEdgeAddition" : 0.7, + // Probability of deleting an outgoing Edge of a Team. + // "pEdgeDeletion" : 0.7, // Default value + "pEdgeDeletion" : 0.7, + // Probability of changing the destination of an Edge. + // "pEdgeDestinationChange" : 0.1, // Default value + "pEdgeDestinationChange" : 0.1, + // Probability of the new destination of an Edge to be an Action. + // "pEdgeDestinationIsAction" : 0.5, // Default value + "pEdgeDestinationIsAction" : 0.5, + // Probability of mutating the Program of an outgoing Edge. + // "pProgramMutation" : 0.2, // Default value + "pProgramMutation" : 0.2 + } + }, + // Number of generations of the training. + // "nbGenerations" : 500, // Default value + "nbGenerations" : 10, + // [Only used in AdversarialLearningAgent.] + // Number of times each job is evaluated in the learning process. + // Each root may belong to several jobs, hence this parameter should be lower + // than the nbIterationsPerPolicyEvaluation parameter. + // "nbIterationsPerJob" : 1, // Default value + "nbIterationsPerJob" : 1, + // Number of evaluation of each root per generation. + // "nbIterationsPerPolicyEvaluation" : 5, // Default value + "nbIterationsPerPolicyEvaluation" : 1, + // Number of Constant available in each Program. + // "nbProgramConstant" : 0, // Default value + "nbProgramConstant" : 5, + // Number of registers for the Program execution. + // "nbRegisters" : 8, // Default value + "nbRegisters" : 8, + // [Only used in ParallelLearningAgent and child classes.] + // Number of threads used for the training process. + // When undefined in the json file, this parameter is automatically set to the + // number of cores of the CPU. + // /* "nbThreads" : 0,*/ // Commented by default + "nbThreads" : 1, + // Percentage of deleted (and regenerated) root TPGVertex at each generation. + // "ratioDeletedRoots" : 0.5, // Default value + "ratioDeletedRoots" : 0.5 +} diff --git a/gridworld/src/gridworld.cpp b/gridworld/src/gridworld.cpp new file mode 100644 index 0000000..b33b0a8 --- /dev/null +++ b/gridworld/src/gridworld.cpp @@ -0,0 +1,100 @@ +#include"gridworld.h" + +void GridWorld::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber){ + + // Reset agent coordonate + agentCoord = {0, 0}; + + // Reset terminated and score + terminated = false; + score = 0.0; + + // Set data + currentState.setDataAt(typeid(double), 0, agentCoord[0]); + currentState.setDataAt(typeid(double), 1, agentCoord[1]); +} + +bool GridWorld::positionAvailable(int pos_x, int pos_y){ + + // position unavailable on axis x + if(pos_x == size[0] || pos_x == -1){ + return false; + } + + // position unavailable on axis x + if(pos_y == size[1] || pos_y == -1){ + return false; + } + + // position unavailable because tile is unavailable + if (grid[pos_y][pos_x] == 3){ + return false; + } + + // Else : position is available + return true; + +} + +void GridWorld::doAction(uint64_t action){ + + switch (action){ + case 0: // left + if (positionAvailable(agentCoord[0] - 1, agentCoord[1])) agentCoord[0]--; + break; + case 1: // Down + if (positionAvailable(agentCoord[0], agentCoord[1] + 1)) agentCoord[1]++; + break; + case 2: // Right + if (positionAvailable(agentCoord[0] + 1, agentCoord[1])) agentCoord[0]++; + break; + case 3: // Up + if (positionAvailable(agentCoord[0], agentCoord[1] - 1)) agentCoord[1]--; + break; + } + + // Reward is always -1 except when an output is reached + double reward = -1; + + if(grid[agentCoord[1]][agentCoord[0]] == 1){ + // good output reached + terminated = true; + reward = 100; + } else if(grid[agentCoord[1]][agentCoord[0]] == 2){ + // Bad output reached + terminated = true; + reward = -100; + } + + // update score + score += reward; + + // Set data + currentState.setDataAt(typeid(double), 0, agentCoord[0]); + currentState.setDataAt(typeid(double), 1, agentCoord[1]); +} + +bool GridWorld::isTerminal() const{ + return terminated; +} + +double GridWorld::getScore() const { + return score; +} + +std::vector> GridWorld::getDataSources() +{ + auto result = std::vector>(); + result.push_back(this->currentState); + return result; +} + +Learn::LearningEnvironment* GridWorld::clone() const +{ + return new GridWorld(*this); +} + +bool GridWorld::isCopyable() const +{ + return true; +} diff --git a/gridworld/src/gridworld.h b/gridworld/src/gridworld.h new file mode 100644 index 0000000..f58d155 --- /dev/null +++ b/gridworld/src/gridworld.h @@ -0,0 +1,72 @@ + +#ifndef GRIDWORLD_H +#define GRIDWORLD_H + +#include + +class GridWorld : public Learn::LearningEnvironment{ + + private: + + /** + * \brief grid of the GridWorld + * - 0 are accessible tiles + * - 1 is good output + * - 2 is bad output + * - 3 are unaccessible tiles + */ + std::vector> grid = {{ 0, 0, 0, 2}, + { 0, 0, 3, 3}, + { 0, 0, 0, 1}}; + + /// Size of the grid + std::vector size = {4, 3}; + + /// Position of the agent + std::vector agentCoord = {0 , 0}; + + /// True if the episode is terminated + bool terminated = false; + + /// Total reward accumulated since the last reset + double score = 0.0; + + /// Current State + Data::PrimitiveTypeArray currentState; + + public: + + GridWorld() : LearningEnvironment((uint64_t) 4), currentState(2) {}; + + GridWorld(const GridWorld& other) = default; + + /// Inherited via LearningEnvironment + virtual void reset(size_t seed = 0, Learn::LearningMode mode = Learn::LearningMode::TRAINING, + uint16_t iterationNumber = 0, uint64_t generationNumber = 0) override; + + /// @brief Return true if the position indicated is available + /// @param pos_x Coordonate on axis x + /// @param pos_y Coordonate on axis y + /// @return boolean that indicate if the position is available + bool positionAvailable(int pos_x, int pos_y); + + /// Inherited via LearningEnvironment + virtual void doAction(uint64_t actionID) override; + + /// Inherited via LearningEnvironment + virtual double getScore() const override; + + /// Inherited via LearningEnvironment + virtual bool isTerminal() const override; + + /// Inherited via LearningEnvironment + virtual std::vector> getDataSources() override; + + /// Inherited via LearningEnvironment + virtual LearningEnvironment* clone() const; + + /// Inherited via LearningEnvironment + virtual bool isCopyable() const override; +}; + +#endif \ No newline at end of file diff --git a/gridworld/src/instructions.cpp b/gridworld/src/instructions.cpp new file mode 100644 index 0000000..ac4a451 --- /dev/null +++ b/gridworld/src/instructions.cpp @@ -0,0 +1,28 @@ +#define _USE_MATH_DEFINES // To get M_PI +#include + +#include "instructions.h" + +void fillInstructionSet(Instructions::Set& set) { + auto minus = [](double a, double b) -> double { return a - b; }; + auto add = [](double a, double b) -> double { return a + b; }; + auto mult = [](double a, double b) -> double { return a * b; }; + auto div = [](double a, double b) -> double { return a / b; }; + auto max = [](double a, double b) -> double { return std::max(a, b); }; + auto ln = [](double a) -> double { return std::log(a); }; + auto exp = [](double a) -> double { return std::exp(a); }; + auto cos = [](double a) -> double { return std::cos(a); }; + auto sin = [](double a) -> double { return std::sin(a); }; + auto tan = [](double a) -> double { return std::tan(a); }; + + set.add(*(new Instructions::LambdaInstruction(minus))); + set.add(*(new Instructions::LambdaInstruction(add))); + set.add(*(new Instructions::LambdaInstruction(mult))); + set.add(*(new Instructions::LambdaInstruction(div))); + set.add(*(new Instructions::LambdaInstruction(max))); + set.add(*(new Instructions::LambdaInstruction(exp))); + set.add(*(new Instructions::LambdaInstruction(ln))); + set.add(*(new Instructions::LambdaInstruction(cos))); + set.add(*(new Instructions::LambdaInstruction(sin))); + set.add(*(new Instructions::LambdaInstruction(tan))); +} \ No newline at end of file diff --git a/gridworld/src/instructions.h b/gridworld/src/instructions.h new file mode 100644 index 0000000..413bc61 --- /dev/null +++ b/gridworld/src/instructions.h @@ -0,0 +1,18 @@ +/** +* The purpose of this file is to provide a utility function +* filling a given Instructions::Set with instruction. +* The objective of this file is to specify the instructions +* used in all pendulum related project in a single place. +*/ + +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#include + +/** +* Fill the given instruction set. +*/ +void fillInstructionSet(Instructions::Set& set); + +#endif \ No newline at end of file diff --git a/gridworld/src/main.cpp b/gridworld/src/main.cpp new file mode 100644 index 0000000..7ba8cfe --- /dev/null +++ b/gridworld/src/main.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include +#define _USE_MATH_DEFINES // To get M_PI +#include + +#include "gridworld.h" +#include "instructions.h" + +int main() { + + std::cout << "Start GridWorld application." << std::endl; + + // Create the instruction set for programs + Instructions::Set set; + fillInstructionSet(set); + + // Set the parameters for the learning process. + // (Controls mutations probability, program lengths, and graph size + // among other things) + // Loads them from the file params.json + Learn::LearningParameters params; + File::ParametersParser::loadParametersFromJson(ROOT_DIR "/params.json", params); + + // Instantiate the LearningEnvironment + GridWorld gridWorldLe; + + std::cout << "Number of threads: " << params.nbThreads << std::endl; + + + // Instantiate and init the learning agent + Learn::ParallelLearningAgent la(gridWorldLe, set, params); + la.init(); + + const TPG::TPGVertex* bestRoot = NULL; + + // Basic logger + Log::LABasicLogger basicLogger(la); + + // Create an exporter for all graphs + File::TPGGraphDotExporter dotExporter("out_0000.dot", *la.getTPGGraph()); + + // Logging best policy stat. + std::ofstream stats; + stats.open("bestPolicyStats.md"); + Log::LAPolicyStatsLogger policyStatsLogger(la, stats); + + // Export parameters before starting training. + // These may differ from imported parameters because of LE or machine specific + // settings such as thread count of number of actions. + File::ParametersParser::writeParametersToJson("exported_params.json", params); + + // Train for params.nbGenerations generations + for (int i = 0; i < params.nbGenerations; i++) { + char buff[13]; + sprintf(buff, "out_%04d.dot", i); + dotExporter.setNewFilePath(buff); + dotExporter.print(); + + la.trainOneGeneration(i); + + } + + // Keep best policy + la.keepBestPolicy(); + + // Clear introns instructions + la.getTPGGraph()->clearProgramIntrons(); + + // Export the graph + dotExporter.setNewFilePath("out_best.dot"); + dotExporter.print(); + + TPG::PolicyStats ps; + ps.setEnvironment(la.getTPGGraph()->getEnvironment()); + ps.analyzePolicy(la.getBestRoot().first); + std::ofstream bestStats; + bestStats.open("out_best_stats.md"); + bestStats << ps; + bestStats.close(); + stats.close(); + + // cleanup + for (unsigned int i = 0; i < set.getNbInstructions(); i++) { + delete (&set.getInstruction(i)); + } + + return 0; +} \ No newline at end of file