diff --git a/rl_exercises/christmas_challenge/evaluate.py b/rl_exercises/christmas_challenge/evaluate.py deleted file mode 100644 index ebbc571..0000000 --- a/rl_exercises/christmas_challenge/evaluate.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import annotations - -from typing import Callable, List - -import os - -import compiler_gym -import gymnasium as gym -import numpy as np -from compiler_gym.envs.llvm import make_benchmark -from gymnasium.wrappers import TimeLimit -from policy import create_policy -from tqdm import tqdm - - -def evaluate(env: gym.Env, policy: Callable[[np.ndarray], int], episodes: int = 100) -> float: - """ - Evaluate a given Policy on an Environment - - Parameters - ---------- - env: gym.Env - Environment to evaluate on - policy: Callable[[np.ndarray], int] - Policy to evaluate - episodes: int - Evaluation episodes - - Returns - ------- - mean_rewards - Mean evaluation rewards - """ - episode_rewards: List[float] = [] - pbar = tqdm(total=episodes) - for _ in range(episodes): - obs, _ = env.reset() - episode_rewards.append(0) - done = False - episode_steps = 0 - while not done: - action = policy(obs) - obs, reward, terminated, truncated, _ = env.step(action) - episode_rewards[-1] += reward - episode_steps += 1 - if terminated or truncated: - pbar.set_postfix({"episode reward": episode_rewards[-1], "episode step": episode_steps}) - pbar.update(1) - env.close() - return np.mean(episode_rewards) - - -if __name__ == "__main__": - print(compiler_gym.COMPILER_GYM_ENVS) - custom_benchmark = make_benchmark( - os.path.join(os.path.abspath(os.path.dirname(__file__)), "custom_benchmarks", "rot13.cpp") - ) - benchmark = "cbench-v1/dijkstra" - env = gym.make( - "llvm-autophase-ic-v0", benchmark=benchmark, reward_space="IrInstructionCountNorm", apply_api_compatibility=True - ) - env = TimeLimit(env, max_episode_steps=100) - policy = create_policy(env) - return_mean = evaluate(env, policy) - print(return_mean) diff --git a/rl_exercises/christmas_challenge/policy.py b/rl_exercises/christmas_challenge/policy.py deleted file mode 100644 index 2b57fca..0000000 --- a/rl_exercises/christmas_challenge/policy.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import gymnasium as gym -import numpy as np -from typing import Callable - - -def create_policy(env: gym.Env) -> Callable: - """ - Create Policy - - Parameters - ---------- - env: gym.Env - Training Environment - - Returns - ------- - policy - Initialiyed Policy - """ - print( - f"""Creating policy for environment {env} - with observation space {env.observation_space} - and action space {env.action_space}""" - ) - - def policy(obs: np.ndarray) -> int: - """ - Receives an observation `obs` and returns the action for the given environment. - - Parameters - ---------- - obs : np.ndarray - Observation of the current state of the environment. - - Returns - ------- - int - Action to take in the given state. - """ - assert env.action_space is not None - # TODO: replace this random policy - return env.action_space.sample() - - return policy