Skip to content

Commit

Permalink
adding benchmark target; updating ui; bumping rlt
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-eschmann committed Oct 1, 2024
1 parent 9a19e5b commit 04baa57
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
#set(RL_TOOLS_BACKEND_ENABLE_ACCELERATE ON) # if you are on macOS (fastest on Apple Silicon)
add_subdirectory(external/rl_tools)

add_executable(my_pendulum
src/main.cpp
)
#target_compile_definitions(my_pendulum PRIVATE BENCHMARK)
add_executable(my_pendulum src/main.cpp)
target_link_libraries(my_pendulum PRIVATE RLtools::RLtools)

# The following target disables evaluations and checkpointing during training to assess the training time
add_executable(my_pendulum_benchmark src/main.cpp)
target_compile_definitions(my_pendulum_benchmark PRIVATE BENCHMARK)
target_link_libraries(my_pendulum_benchmark PRIVATE RLtools::RLtools)



if(NOT MSVC AND CMAKE_BUILD_TYPE STREQUAL "Release")
Expand Down
2 changes: 1 addition & 1 deletion external/rl_tools
Submodule rl_tools updated 286 files
26 changes: 18 additions & 8 deletions include/my_pendulum/operations_cpu.h
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
#include <string>

namespace rl_tools{
template<typename DEVICE, typename SPEC>
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters){
template <typename DEVICE, typename SPEC>
std::string json(DEVICE&, MyPendulum<SPEC>& env, typename MyPendulum<SPEC>::Parameters& parameters){
return "{}";
}

template<typename DEVICE, typename SPEC>
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters, const typename MyPendulum<SPEC>::State& state){
template <typename DEVICE, typename SPEC>
std::string json(DEVICE&, MyPendulum<SPEC>& env, typename MyPendulum<SPEC>::Parameters& parameters, typename MyPendulum<SPEC>::State& state){
std::string json = "{";
json += "\"theta\":" + std::to_string(state.theta) + ",";
json += "\"theta_dot\":" + std::to_string(state.theta_dot);
json += "}";
return json;
}

template <typename DEVICE, typename SPEC>
std::string get_ui(DEVICE& device, MyPendulum<SPEC>& env){
// just the body of `function render(ctx, state, action) {` (so that it can be easily processed by `new Function("ctx", "state", "action", body)`
// Implement the functions `export async function render(ui_state, parameters, state, action)` and `export async function init(canvas, parameters, options)` and `export` them so that they are available as ES6 imports
// Please have a look at https://studio.rl.tools which helps you create render functions interactively
std::string ui = R"RL_TOOLS_LITERAL(
export async function init(canvas, options){
// Simply saving the context for 2D environments
return {
ctx: canvas.getContext('2d')
}
}
export async function render(ui_state, parameters, state, action) {
const ctx = ui_state.ctx
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);
const centerX = ctx.canvas.width / 2;
Expand Down Expand Up @@ -88,8 +98,8 @@ namespace rl_tools{
ctx.lineTo(arrowX, arrowY);
ctx.fillStyle = 'black';
ctx.fill();
}
)RL_TOOLS_LITERAL";
return ui;
}

}
}
25 changes: 15 additions & 10 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,32 @@ using TI = typename DEVICE::index_t;
using PENDULUM_SPEC = MyPendulumSpecification<T, TI, MyPendulumParameters<T>>;
using ENVIRONMENT = MyPendulum<PENDULUM_SPEC>;
struct LOOP_CORE_PARAMETERS: rlt::rl::algorithms::ppo::loop::core::DefaultParameters<T, TI, ENVIRONMENT>{
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI>{
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
static constexpr TI N_EPOCHS = 2;
};

static constexpr TI BATCH_SIZE = 256;
static constexpr TI ACTOR_HIDDEN_DIM = 64;
static constexpr TI CRITIC_HIDDEN_DIM = 64;
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 1024;
static constexpr TI N_ENVIRONMENTS = 4;
static constexpr TI ON_POLICY_RUNNER_STEPS_PER_ENV = 256;
static constexpr TI BATCH_SIZE = 64;
static constexpr TI TOTAL_STEP_LIMIT = 300000;
static constexpr TI STEP_LIMIT = TOTAL_STEP_LIMIT/(ON_POLICY_RUNNER_STEPS_PER_ENV * N_ENVIRONMENTS) + 1;
static constexpr TI EPISODE_STEP_LIMIT = 200;
using OPTIMIZER_PARAMETERS = rlt::nn::optimizers::adam::DEFAULT_PARAMETERS_PYTORCH<T>;
struct PPO_PARAMETERS: rlt::rl::algorithms::ppo::DefaultParameters<T, TI, BATCH_SIZE>{
static constexpr T ACTION_ENTROPY_COEFFICIENT = 0.0;
static constexpr TI N_EPOCHS = 2;
static constexpr T GAMMA = 0.9;
static constexpr T INITIAL_ACTION_STD = 2.0;
static constexpr bool NORMALIZE_OBSERVATIONS = true;
};
};
using LOOP_CORE_CONFIG = rlt::rl::algorithms::ppo::loop::core::Config<T, TI, RNG, ENVIRONMENT, LOOP_CORE_PARAMETERS>;
#ifndef BENCHMARK
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
template <typename NEXT>
struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<T, TI, NEXT>{
static constexpr TI EVALUATION_INTERVAL = 4;
static constexpr TI NUM_EVALUATION_EPISODES = 10;
static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
};
#ifndef BENCHMARK
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
using LOOP_EVALUATION_CONFIG = rlt::rl::loop::steps::evaluation::Config<LOOP_EXTRACK_CONFIG, LOOP_EVAL_PARAMETERS<LOOP_EXTRACK_CONFIG>>; // Evaluates the policy in a fixed interval and logs the return
struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters<T, TI, LOOP_EVALUATION_CONFIG>{
static constexpr TI INTERVAL_TEMP = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10;
Expand Down Expand Up @@ -84,5 +89,5 @@ int main(){
}
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end_time-start_time;
std::cout << "Training time: " << diff.count() << std::endl;
std::cout << "Training time: " << diff.count() << " s" << std::endl;
}

0 comments on commit 04baa57

Please sign in to comment.