Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creation of GridWorld environment #16

Merged
merged 7 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
app: [pendulum, tic-tac-toe, mnist, stickgame]
app: [pendulum, tic-tac-toe, mnist, stickgame, gridworld]
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
101 changes: 101 additions & 0 deletions gridworld/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
cmake_minimum_required(VERSION 3.12.4)

# *******************************************
# ************* CMake Content ***************
# *******************************************
# This CMake create a workspace containing the following projects
#
# Programs
# - gridworld

set (PROJECT_NAME gridworld)

project(${PROJECT_NAME})

# Add definition for relative path into project
add_definitions( -DPROJECT_ROOT_PATH="${CMAKE_CURRENT_SOURCE_DIR}")

# Disable C and C++ compiler extensions.
# C/CXX_EXTENSIONS are ON by default to allow the compilers to use extended
# variants of the C/CXX language.
# However, this could expose cross-platform bugs in user code or in the headers
# of third-party dependencies and thus it is strongly suggested to turn
# extensions off.
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_CXX_EXTENSIONS OFF)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if(NOT ${CMAKE_GENERATOR} MATCHES "Visual Studio.*")

# Link with pthread
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")

# Debug or release
if(CMAKE_BUILD_TYPE MATCHES "Debug")
MESSAGE("Generate Debug project")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Debug)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -pg -Wall")
else()
MESSAGE("Generate Release project")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Release)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall")
endif()
#add libmath during non visual studio builds
set(CMAKE_EXTRA_LIB m)
else()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif()

# Add definitions for testing purposes
if(${TESTING})
MESSAGE("Testing mode")
add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2)
endif()

# *******************************************
# *********** GEGELATI LIBRARY **************
# *******************************************

if(WIN32)
set(LIBS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib)
# find the gegelatilib-x.y.z folder in the lib directory.
file(GLOB GEGELATI_ROOT_DIR "${LIBS_DIR}/gegelatilib-[\\.|0-9]*")
set(ENV{GEGELATI_DIR} ${GEGELATI_ROOT_DIR})
endif()
find_package(GEGELATI)


if (WIN32)
file(GLOB
GEGELATI_DLL
${GEGELATI_ROOT_DIR}/bin/*.dll
)

MESSAGE("Copy GEGELATI DLLs into ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug)
file(COPY ${GEGELATI_DLL} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Release)
endif()
endif()

# *******************************************
# ************** Executable ****************
# *******************************************

# Executable to learn the TPG
file(GLOB_RECURSE
gridworld_files
./src/*.cpp
./src/*.h
./params.json
)


include_directories(${GEGELATI_INCLUDE_DIRS} )
add_executable(${PROJECT_NAME} ${gridworld_files})
target_link_libraries(${PROJECT_NAME} ${GEGELATI_LIBRARIES})
target_compile_definitions(${PROJECT_NAME} PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}")
23 changes: 23 additions & 0 deletions gridworld/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# GridWorld

This application teaches a learning agent built with the [GEGELATI library](https://github.com/gegelati/gegelati) how to get out of a gridWorld

The gridWorld is a grid composed of 0, 1, 2, and 3.
* 0 is an available tile
* 1 is a good output tile
* 2 is a bad output tile
* 3 is unavailable tile

The agent start at coordonate (0, 0). It can go left, right, up and down.

It get a reward of -1 if it reach a tile with value 0.
If it reach a tile a value 1 or 2, it terminate the environnement and the agent get a reward of respectively 100 or -100.

## How to Build?
The build process of applications relies on [cmake](https://cmake.org) to configure a project for a wide variety of development environments and operating systems. Install [cmake](https://cmake.org/download/) on your system before building the application.

### Under windows
1. Copy the `gegelatilib-<version>` folder containing the binaries of the [GEGELATI library](https://github.com/gegelati/gegelati) into the `lib` folder.
2. Open a command line interface in the `bin` folder.
3. Enter the following command to create the project for your favorite IDE `cmake ..`.
4. Open the project created in the `bin` folder, or launch the build with the following command: `cmake --build .`.
1 change: 1 addition & 0 deletions gridworld/bin/.dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file exists only to force the presence of the lib folder in the git repository.
1 change: 1 addition & 0 deletions gridworld/lib/.dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file exists only to force the presence of the lib folder in the git repository.
113 changes: 113 additions & 0 deletions gridworld/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
// Number of recordings held in the Archive.
// "archiveSize" : 50, // Default value
"archiveSize" : 2000,
// Probability of archiving the result of each Program execution.
// "archivingProbability" : 0.05, // Default value
"archivingProbability" : 0.01,
// Boolean used to activate an evaluation of the surviving roots in validation
// mode after the training at each generation.
// "doValidation" : false, // Default value
"doValidation" : false,
// Maximum number of actions performed on the learning environment during the
// each evaluation of a root.
// "maxNbActionsPerEval" : 1000, // Default value
"maxNbActionsPerEval" : 100,
// Maximum number of times a given root is evaluated.After this number is
// reached, possibly after several generations, the score of the root will be
// fixed, and no further evaluation will be done.
// "maxNbEvaluationPerPolicy" : 1000, // Default value
"maxNbEvaluationPerPolicy" : 10,
"mutation" :
{
"prog" :
{
// Maximum constant value possible.
// "maxConstValue" : 100, // Default value
"maxConstValue" : 10,
// Maximum number of Line within the Program of the TPG.
// "maxProgramSize" : 96, // Default value
"maxProgramSize" : 20,
// Minimum constant value possible.
// "minConstValue" : -10, // Default value
"minConstValue" : -10,
// Probability of inserting a line in the Program.
// "pAdd" : 0.5, // Default value
"pAdd" : 0.5,
// Probability of each constant to be mutated.
// "pConstantMutation" : 0.5, // Default value
"pConstantMutation" : 0.5,
// Probability of deleting a line of the Program.
// "pDelete" : 0.5, // Default value
"pDelete" : 0.5,
// Probability of altering a line of the Program.
// "pMutate" : 1.0, // Default value
"pMutate" : 1.0,
// Probability of swapping two lines of the Program.
// "pSwap" : 1.0, // Default value
"pSwap" : 1.0
},
"tpg" :
{
// When a Program is mutated, makes sure its behavior is no longer the same.
// "forceProgramBehaviorChangeOnMutation" : false, // Default value
"forceProgramBehaviorChangeOnMutation" : true,
// Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when
// initialized.
// "maxInitOutgoingEdges" : 3, // Default value
"maxInitOutgoingEdges" : 4,
// Maximum number of outgoing edge during TPGGraph mutations.
// "maxOutgoingEdges" : 5, // Default value
"maxOutgoingEdges" : 10,
// Number of TPGAction vertex of the initialized TPGGraph.
// This parameter is generally automatically set by the LearningEnvironment.
// /* "nbActions" : 0,*/ // Commented by default
/* "nbActions" : 0,*/
// Number of root TPGTeams to maintain when populating the TPGGraph
// "nbRoots" : 100, // Default value
"nbRoots" : 500,
// Probability of adding an outgoing Edge to a Team.
// "pEdgeAddition" : 0.7, // Default value
"pEdgeAddition" : 0.7,
// Probability of deleting an outgoing Edge of a Team.
// "pEdgeDeletion" : 0.7, // Default value
"pEdgeDeletion" : 0.7,
// Probability of changing the destination of an Edge.
// "pEdgeDestinationChange" : 0.1, // Default value
"pEdgeDestinationChange" : 0.1,
// Probability of the new destination of an Edge to be an Action.
// "pEdgeDestinationIsAction" : 0.5, // Default value
"pEdgeDestinationIsAction" : 0.5,
// Probability of mutating the Program of an outgoing Edge.
// "pProgramMutation" : 0.2, // Default value
"pProgramMutation" : 0.2
}
},
// Number of generations of the training.
// "nbGenerations" : 500, // Default value
"nbGenerations" : 10,
// [Only used in AdversarialLearningAgent.]
// Number of times each job is evaluated in the learning process.
// Each root may belong to several jobs, hence this parameter should be lower
// than the nbIterationsPerPolicyEvaluation parameter.
// "nbIterationsPerJob" : 1, // Default value
"nbIterationsPerJob" : 1,
// Number of evaluation of each root per generation.
// "nbIterationsPerPolicyEvaluation" : 5, // Default value
"nbIterationsPerPolicyEvaluation" : 1,
// Number of Constant available in each Program.
// "nbProgramConstant" : 0, // Default value
"nbProgramConstant" : 5,
// Number of registers for the Program execution.
// "nbRegisters" : 8, // Default value
"nbRegisters" : 8,
// [Only used in ParallelLearningAgent and child classes.]
// Number of threads used for the training process.
// When undefined in the json file, this parameter is automatically set to the
// number of cores of the CPU.
// /* "nbThreads" : 0,*/ // Commented by default
"nbThreads" : 1,
// Percentage of deleted (and regenerated) root TPGVertex at each generation.
// "ratioDeletedRoots" : 0.5, // Default value
"ratioDeletedRoots" : 0.5
}
100 changes: 100 additions & 0 deletions gridworld/src/gridworld.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include"gridworld.h"

void GridWorld::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber){

// Reset agent coordonate
agentCoord = {0, 0};

// Reset terminated and score
terminated = false;
score = 0.0;

// Set data
currentState.setDataAt(typeid(double), 0, agentCoord[0]);
currentState.setDataAt(typeid(double), 1, agentCoord[1]);
}

bool GridWorld::positionAvailable(int pos_x, int pos_y){

// position unavailable on axis x
if(pos_x == size[0] || pos_x == -1){
return false;
}

// position unavailable on axis x
if(pos_y == size[1] || pos_y == -1){
return false;
}

// position unavailable because tile is unavailable
if (grid[pos_y][pos_x] == 3){
return false;
}

// Else : position is available
return true;

}

void GridWorld::doAction(uint64_t action){

switch (action){
case 0: // left
if (positionAvailable(agentCoord[0] - 1, agentCoord[1])) agentCoord[0]--;
break;
case 1: // Down
if (positionAvailable(agentCoord[0], agentCoord[1] + 1)) agentCoord[1]++;
break;
case 2: // Right
if (positionAvailable(agentCoord[0] + 1, agentCoord[1])) agentCoord[0]++;
break;
case 3: // Up
if (positionAvailable(agentCoord[0], agentCoord[1] - 1)) agentCoord[1]--;
break;
}

// Reward is always -1 except when an output is reached
double reward = -1;

if(grid[agentCoord[1]][agentCoord[0]] == 1){
// good output reached
terminated = true;
reward = 100;
} else if(grid[agentCoord[1]][agentCoord[0]] == 2){
// Bad output reached
terminated = true;
reward = -100;
}

// update score
score += reward;

// Set data
currentState.setDataAt(typeid(double), 0, agentCoord[0]);
currentState.setDataAt(typeid(double), 1, agentCoord[1]);
}

bool GridWorld::isTerminal() const{
return terminated;
}

double GridWorld::getScore() const {
return score;
}

std::vector<std::reference_wrapper<const Data::DataHandler>> GridWorld::getDataSources()
{
auto result = std::vector<std::reference_wrapper<const Data::DataHandler>>();
result.push_back(this->currentState);
return result;
}

Learn::LearningEnvironment* GridWorld::clone() const
{
return new GridWorld(*this);
}

bool GridWorld::isCopyable() const
{
return true;
}
Loading
Loading