diff --git a/pureples/shared/gym_runner.py b/pureples/shared/gym_runner.py index 2afde15..2840d31 100644 --- a/pureples/shared/gym_runner.py +++ b/pureples/shared/gym_runner.py @@ -35,7 +35,7 @@ def eval_fitness(genomes, config): fitnesses = [] for _ in range(trials): - ob = env.reset() + ob = env.reset()[0] net.reset() total_reward = 0 @@ -45,7 +45,8 @@ def eval_fitness(genomes, config): o = net.activate(ob) action = np.argmax(o) - ob, reward, done, _ = env.step(action) + ob, reward, terminated, truncated, _ = env.step(action) + done = terminated or truncated total_reward += reward if done: break @@ -91,7 +92,7 @@ def eval_fitness(genomes, config): fitnesses = [] for _ in range(trials): - ob = env.reset() + ob = env.reset()[0] net.reset() total_reward = 0 @@ -100,7 +101,8 @@ def eval_fitness(genomes, config): for _ in range(activations): o = net.activate(ob) action = np.argmax(o) - ob, reward, done, _ = env.step(action) + ob, reward, terminated, truncated, _ = env.step(action) + done = terminated or truncated total_reward += reward if done: break @@ -143,14 +145,15 @@ def eval_fitness(genomes, config): fitnesses = [] for _ in range(trials): - ob = env.reset() + ob = env.reset()[0] total_reward = 0 for _ in range(max_steps): o = net.activate(ob) action = np.argmax(o) - ob, reward, done, _ = env.step(action) + ob, reward, terminated, truncated, _ = env.step(action) + done = terminated or truncated total_reward += reward if done: break diff --git a/setup.py b/setup.py index d5a9c3e..66222b8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='pureples', - version='0.0', + version='0.1', author='adrian, simon', author_email='mail@adrianwesth.dk', maintainer='simon, adrian',