diff --git a/.gitignore b/.gitignore index 73a8dc4..01afe47 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /cmake-build-release /cmake-build-release-visual-studio /.idea +/experiments diff --git a/CMakeLists.txt b/CMakeLists.txt index 85c25ba..8629059 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ target_link_libraries(my_pendulum PRIVATE RLtools::RLtools) -if(NOT MSVC) +if(NOT MSVC AND CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_options(my_pendulum PRIVATE -Ofast) if(NOT APPLE) target_compile_options(my_pendulum PRIVATE -march=native) diff --git a/external/rl_tools b/external/rl_tools index 3310fd1..623cfe4 160000 --- a/external/rl_tools +++ b/external/rl_tools @@ -1 +1 @@ -Subproject commit 3310fd1228e6c024613bc773161b46844b87b3df +Subproject commit 623cfe4dcb8b908f98d70a85ccf926944060b09b diff --git a/include/my_pendulum/operations_cpu.h b/include/my_pendulum/operations_cpu.h new file mode 100644 index 0000000..353e356 --- /dev/null +++ b/include/my_pendulum/operations_cpu.h @@ -0,0 +1,94 @@ +namespace rl_tools{ + template + std::string json(DEVICE& device, const MyPendulum& env, const typename MyPendulum::Parameters& parameters){ + return "{}"; + } + + template + std::string json(DEVICE& device, const MyPendulum& env, const typename MyPendulum::Parameters& parameters, const typename MyPendulum::State& state){ + std::string json = "{"; + json += "\"theta\":" + std::to_string(state.theta) + ","; + json += "\"theta_dot\":" + std::to_string(state.theta_dot); + json += "}"; + return json; + } + template + std::string get_ui(DEVICE& device, MyPendulum& env){ + // just the body of `function render(ctx, state, action) {` (so that it can be easily processed by `new Function("ctx", "state", "action", body)` + std::string ui = R"RL_TOOLS_LITERAL( + ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height); + + const centerX = ctx.canvas.width / 2; + const centerY = ctx.canvas.height / 2; + const canvasWidth = ctx.canvas.width; + + const pendulumLength = canvasWidth * 0.2; + const bobRadius = canvasWidth * 0.02; + const pivotRadius = canvasWidth * 0.01; + + // Draw the Pendulum + const adjustedTheta = state.theta - Math.PI; + + const pendulumX = centerX + pendulumLength * Math.sin(adjustedTheta); + const pendulumY = centerY + pendulumLength * Math.cos(adjustedTheta); + + ctx.beginPath(); + ctx.moveTo(centerX, centerY); + ctx.lineTo(pendulumX, pendulumY); + ctx.lineWidth = canvasWidth * 0.008; + ctx.strokeStyle = 'black'; + ctx.stroke(); + + ctx.beginPath(); + ctx.arc(pendulumX, pendulumY, bobRadius, 0, 2 * Math.PI); + ctx.fillStyle = '#7DB9B6'; + ctx.fill(); + ctx.stroke(); + + ctx.beginPath(); + ctx.arc(centerX, centerY, pivotRadius, 0, 2 * Math.PI); + ctx.fillStyle = 'black'; + ctx.fill(); + ctx.stroke(); + + // Draw torque arc + const torqueMagnitude = -Math.max(-1, Math.min(action[0], 1)); + const arrowRadius = canvasWidth * 0.08; + const magnitudeRadians = (Math.PI * 2 / 3 * torqueMagnitude); + const startAngle = Math.PI / 2 + (torqueMagnitude > 0 ? 0 : magnitudeRadians); + const endAngle = Math.PI / 2 + (torqueMagnitude < 0 ? 0 : magnitudeRadians); + + ctx.beginPath(); + ctx.arc(centerX, centerY, arrowRadius, startAngle, endAngle); + ctx.strokeStyle = 'black'; + ctx.lineWidth = canvasWidth * 0.008; + ctx.stroke(); + + // Draw arrowhead + const arrowAngle = torqueMagnitude > 0 ? endAngle : startAngle; + const arrowHeadAngularOffset = torqueMagnitude * Math.PI/180*20 + const arrowX = centerX + arrowRadius * Math.cos(arrowAngle + arrowHeadAngularOffset); + const arrowY = centerY + arrowRadius * Math.sin(arrowAngle + arrowHeadAngularOffset); + + const headlen = canvasWidth * 0.04 * Math.min(Math.abs(torqueMagnitude)*2, 1); + const angleOffset = Math.PI / 6; + const rotationAngle = Math.PI / 2 + (torqueMagnitude > 0 ? 0 : Math.PI); + + ctx.beginPath(); + ctx.moveTo(arrowX, arrowY); + ctx.lineTo( + arrowX - headlen * Math.cos(arrowAngle + arrowHeadAngularOffset/2 - angleOffset + rotationAngle), + arrowY - headlen * Math.sin(arrowAngle + arrowHeadAngularOffset/2 - angleOffset + rotationAngle) + ); + ctx.lineTo( + arrowX - headlen * Math.cos(arrowAngle + arrowHeadAngularOffset/2 + angleOffset + rotationAngle), + arrowY - headlen * Math.sin(arrowAngle + arrowHeadAngularOffset/2 + angleOffset + rotationAngle) + ); + ctx.lineTo(arrowX, arrowY); + ctx.fillStyle = 'black'; + ctx.fill(); + )RL_TOOLS_LITERAL"; + return ui; + } + +} \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..ad623b8 --- /dev/null +++ b/index.html @@ -0,0 +1,14 @@ + + + +

RLtools

+ + + EXperiement TRACKing UI + + + diff --git a/serve.sh b/serve.sh new file mode 100755 index 0000000..83a5c9f --- /dev/null +++ b/serve.sh @@ -0,0 +1,3 @@ +set -e +watch -n10 ./external/rl_tools/tools/index_experiments_static.sh experiments & +python3 -m http.server $@ diff --git a/src/main.cpp b/src/main.cpp index 1fffed8..d2c5867 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,11 +4,13 @@ #include "../include/my_pendulum/my_pendulum.h" #include "../include/my_pendulum/operations_generic.h" +#include "../include/my_pendulum/operations_cpu.h" // JSON conversion functions for the rl::loop::steps::save_trajectories step (stored according to the experiment tracking specification: https://docs.rl.tools/10-Experiment%20Tracking.html) #include #include -#include +#include #include +#include namespace rlt = rl_tools; @@ -40,7 +42,17 @@ struct LOOP_EVAL_PARAMETERS: rlt::rl::loop::steps::evaluation::Parameters>; +using LOOP_EXTRACK_CONFIG = rlt::rl::loop::steps::extrack::Config; // Sets up the experiment tracking structure (https://docs.rl.tools/10-Experiment%20Tracking.html) +using LOOP_EVALUATION_CONFIG = rlt::rl::loop::steps::evaluation::Config>; // Evaluates the policy in a fixed interval and logs the return +struct LOOP_SAVE_TRAJECTORIES_PARAMETERS: rlt::rl::loop::steps::save_trajectories::Parameters{ + static constexpr TI INTERVAL_TEMP = LOOP_CORE_CONFIG::CORE_PARAMETERS::STEP_LIMIT / 10; + static constexpr TI INTERVAL = INTERVAL_TEMP == 0 ? 1 : INTERVAL_TEMP; + static constexpr TI NUM_EPISODES = 10; +}; +using LOOP_SAVE_TRAJECTORIES_CONFIG = rlt::rl::loop::steps::save_trajectories::Config; // Saves trajectories for replay with the extrack UI +using LOOP_TIMING_CONFIG = rlt::rl::loop::steps::timing::Config; +using LOOP_CONFIG = LOOP_TIMING_CONFIG; + #else using LOOP_CONFIG = LOOP_CORE_CONFIG; #endif @@ -54,6 +66,10 @@ int main(){ DEVICE device; TI seed = 1337; LOOP_STATE ls; +#ifndef BENCHMARK + // Set experiment tracking info + ls.extrack_name = "example"; +#endif rlt::malloc(device, ls); rlt::init(device, ls, seed); ls.actor_optimizer.parameters.alpha = 1e-2;