Skip to content

Commit

Permalink
adding extrack and ui
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-eschmann committed Jun 28, 2024
1 parent c4793ae commit 39acaa5
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
/cmake-build-release
/cmake-build-release-visual-studio
/.idea
/experiments
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ target_link_libraries(my_pendulum PRIVATE RLtools::RLtools)



if(NOT MSVC)
if(NOT MSVC AND CMAKE_BUILD_TYPE STREQUAL "Release")
target_compile_options(my_pendulum PRIVATE -Ofast)
if(NOT APPLE)
target_compile_options(my_pendulum PRIVATE -march=native)
Expand Down
94 changes: 94 additions & 0 deletions include/my_pendulum/operations_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
namespace rl_tools{
template<typename DEVICE, typename SPEC>
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters){
return "{}";
}

template<typename DEVICE, typename SPEC>
std::string json(DEVICE& device, const MyPendulum<SPEC>& env, const typename MyPendulum<SPEC>::Parameters& parameters, const typename MyPendulum<SPEC>::State& state){
std::string json = "{";
json += "\"theta\":" + std::to_string(state.theta) + ",";
json += "\"theta_dot\":" + std::to_string(state.theta_dot);
json += "}";
return json;
}
template <typename DEVICE, typename SPEC>
std::string get_ui(DEVICE& device, MyPendulum<SPEC>& env){
// just the body of `function render(ctx, state, action) {` (so that it can be easily processed by `new Function("ctx", "state", "action", body)`
std::string ui = R"RL_TOOLS_LITERAL(
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);
const centerX = ctx.canvas.width / 2;
const centerY = ctx.canvas.height / 2;
const canvasWidth = ctx.canvas.width;
const pendulumLength = canvasWidth * 0.2;
const bobRadius = canvasWidth * 0.02;
const pivotRadius = canvasWidth * 0.01;
// Draw the Pendulum
const adjustedTheta = state.theta - Math.PI;
const pendulumX = centerX + pendulumLength * Math.sin(adjustedTheta);
const pendulumY = centerY + pendulumLength * Math.cos(adjustedTheta);
ctx.beginPath();
ctx.moveTo(centerX, centerY);
ctx.lineTo(pendulumX, pendulumY);
ctx.lineWidth = canvasWidth * 0.008;
ctx.strokeStyle = 'black';
ctx.stroke();
ctx.beginPath();
ctx.arc(pendulumX, pendulumY, bobRadius, 0, 2 * Math.PI);
ctx.fillStyle = '#7DB9B6';
ctx.fill();
ctx.stroke();
ctx.beginPath();
ctx.arc(centerX, centerY, pivotRadius, 0, 2 * Math.PI);
ctx.fillStyle = 'black';
ctx.fill();
ctx.stroke();
// Draw torque arc
const torqueMagnitude = -Math.max(-1, Math.min(action[0], 1));
const arrowRadius = canvasWidth * 0.08;
const magnitudeRadians = (Math.PI * 2 / 3 * torqueMagnitude);
const startAngle = Math.PI / 2 + (torqueMagnitude > 0 ? 0 : magnitudeRadians);
const endAngle = Math.PI / 2 + (torqueMagnitude < 0 ? 0 : magnitudeRadians);
ctx.beginPath();
ctx.arc(centerX, centerY, arrowRadius, startAngle, endAngle);
ctx.strokeStyle = 'black';
ctx.lineWidth = canvasWidth * 0.008;
ctx.stroke();
// Draw arrowhead
const arrowAngle = torqueMagnitude > 0 ? endAngle : startAngle;
const arrowHeadAngularOffset = torqueMagnitude * Math.PI/180*20
const arrowX = centerX + arrowRadius * Math.cos(arrowAngle + arrowHeadAngularOffset);
const arrowY = centerY + arrowRadius * Math.sin(arrowAngle + arrowHeadAngularOffset);
const headlen = canvasWidth * 0.04 * Math.min(Math.abs(torqueMagnitude)*2, 1);
const angleOffset = Math.PI / 6;
const rotationAngle = Math.PI / 2 + (torqueMagnitude > 0 ? 0 : Math.PI);
ctx.beginPath();
ctx.moveTo(arrowX, arrowY);
ctx.lineTo(
arrowX - headlen * Math.cos(arrowAngle + arrowHeadAngularOffset/2 - angleOffset + rotationAngle),
arrowY - headlen * Math.sin(arrowAngle + arrowHeadAngularOffset/2 - angleOffset + rotationAngle)
);
ctx.lineTo(
arrowX - headlen * Math.cos(arrowAngle + arrowHeadAngularOffset/2 + angleOffset + rotationAngle),
arrowY - headlen * Math.sin(arrowAngle + arrowHeadAngularOffset/2 + angleOffset + rotationAngle)
);
ctx.lineTo(arrowX, arrowY);
ctx.fillStyle = 'black';
ctx.fill();
)RL_TOOLS_LITERAL";
return ui;
}

}
14 changes: 14 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="en">
<head>
<h1>RLtools</h1>
</head>
<body>
<a href="./external/rl_tools/static/extrack_ui?experiments=/experiments/"><b>EX</b>periement <b>TRACK</b>ing UI</a>
</body>
<script>
window.addEventListener('load', () => {

})
</script>
</html>
3 changes: 3 additions & 0 deletions serve.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set -e
watch -n10 ./external/rl_tools/tools/index_experiments_static.sh experiments &
python3 -m http.server $@
20 changes: 18 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@

#include "../include/my_pendulum/my_pendulum.h"
#include "../include/my_pendulum/operations_generic.h"
#include "../include/my_pendulum/operations_cpu.h" // JSON conversion functions for the rl::loop::steps::save_trajectories step (stored according to the experiment tracking specification: https://docs.rl.tools/10-Experiment%20Tracking.html)

#include <rl_tools/rl/algorithms/ppo/loop/core/config.h>
#include <rl_tools/rl/algorithms/ppo/loop/core/operations_generic.h>
#include <rl_tools/rl/loop/steps/evaluation/config.h>
#include <rl_tools/rl/loop/steps/extrack/operations_cpu.h>
#include <rl_tools/rl/loop/steps/evaluation/operations_generic.h>
#include <rl_tools/rl/loop/steps/save_trajectories/operations_cpu.h>

namespace rlt = rl_tools;

Expand Down Expand Up @@ -40,7 +42,17 @@ struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters<T, TI,
static constexpr TI N_EVALUATIONS = NEXT::CORE_PARAMETERS::STEP_LIMIT / EVALUATION_INTERVAL;
};
#ifndef BENCHMARK
using LOOP_CONFIG = rlt::rl::loop::steps::evaluation::Config<LOOP_CORE_CONFIG, LOOP_EVAL_PARAMETERS<LOOP_CORE_CONFIG>>;
using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config<LOOP_CORE_CONFIG>; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html)
using LOOP_EVALUATION_CONFIG = rlt::rl::loop::steps::evaluation::Config<LOOP_EXTRACK_CONFIG, LOOP_EVAL_PARAMETERS<LOOP_EXTRACK_CONFIG>>; // Evaluates the policy in a fixed interval and logs the return
struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters<T, TI, LOOP_EVALUATION_CONFIG>{
static constexpr TI INTERVAL_TEMP = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10;
static constexpr TI INTERVAL = INTERVAL_TEMP == 0 ? 1 : INTERVAL_TEMP;
static constexpr TI NUM_EPISODES = 10;
};
using LOOP_SAVE_TRAJECTORIES_CONFIG = rlt::rl::loop::steps::save_trajectories::Config<LOOP_EVALUATION_CONFIG, LOOP_SAVE_TRAJECTORIES_PARAMETERS>; // Saves trajectories for replay with the extrack UI
using LOOP_TIMING_CONFIG = rlt::rl::loop::steps::timing::Config<LOOP_SAVE_TRAJECTORIES_CONFIG>;
using LOOP_CONFIG = LOOP_TIMING_CONFIG;

#else
using LOOP_CONFIG = LOOP_CORE_CONFIG;
#endif
Expand All @@ -54,6 +66,10 @@ int main(){
DEVICE device;
TI seed = 1337;
LOOP_STATE ls;
#ifndef BENCHMARK
// Set experiment tracking info
ls.extrack_name = "example";
#endif
rlt::malloc(device, ls);
rlt::init(device, ls, seed);
ls.actor_optimizer.parameters.alpha = 1e-2;
Expand Down

0 comments on commit 39acaa5

Please sign in to comment.