pytorch · vmoens · Jul 10, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 21, 2024
diff --git a/.github/unittest/linux_examples/scripts/run_test.sh b/.github/unittest/linux_examples/scripts/run_test.sh
@@ -149,6 +149,18 @@ python .github/unittest/helpers/coverage_run_parallel.py sota-implementations/di
   replay_buffer.size=120 \
   env.name=CartPole-v1 \
   logger.backend=
+python .github/unittest/helpers/coverage_run_parallel.py sota-implementations/crossq/crossq.py \
+  collector.total_frames=48 \
+  collector.init_random_frames=10 \
+  collector.frames_per_batch=16 \
+  collector.env_per_collector=2 \
+  collector.device= \
+  optim.batch_size=10 \
+  optim.utd_ratio=1 \
+  replay_buffer.size=120 \
+  env.name=Pendulum-v1 \
+  network.device= \
+  logger.backend=
 python .github/unittest/helpers/coverage_run_parallel.py sota-implementations/dreamer/dreamer.py \
   collector.total_frames=200 \
   collector.init_random_frames=10 \

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
@@ -121,6 +121,15 @@ REDQ
 
     REDQLoss
 
+CrossQ
+----
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template_noinherit.rst
+
+    CrossQ
+
 IQL
 ----
 

diff --git a/sota-check/run_crossq.sh b/sota-check/run_crossq.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#SBATCH --job-name=crossq
+#SBATCH --ntasks=32
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --output=slurm_logs/crossq_%j.txt
+#SBATCH --error=slurm_errors/crossq_%j.txt
+
+current_commit=$(git rev-parse --short HEAD)
+project_name="torchrl-example-check-$current_commit"
+group_name="crossq"
+export PYTHONPATH=$(dirname $(dirname $PWD))
+python $PYTHONPATH/sota-implementations/crossq/crossq.py \
+  logger.backend=wandb \
+  logger.project_name="$project_name" \
+  logger.group_name="$group_name"
+
+# Capture the exit status of the Python command
+exit_status=$?
+# Write the exit status to a file
+if [ $exit_status -eq 0 ]; then
+  echo "${group_name}_${SLURM_JOB_ID}=success" >> report.log
+else
+  echo "${group_name}_${SLURM_JOB_ID}=error" >> report.log
+fi
diff --git a/sota-implementations/crossq/batchrenorm.py b/sota-implementations/crossq/batchrenorm.py
@@ -0,0 +1,96 @@
+import torch
+import torch.nn as nn
+
+
+class BatchRenorm(nn.Module):
+    """
+    BatchRenorm Module (https://arxiv.org/abs/1702.03275).
+
+    BatchRenorm is an enhanced version of the standard BatchNorm. Unlike BatchNorm,
+    BatchRenorm utilizes running statistics to normalize batches after an initial warmup phase.
+    This approach reduces the impact of "outlier" batches that may occur during extended training periods,
+    making BatchRenorm more robust for long training runs.
+
+    During the warmup phase, BatchRenorm functions identically to a BatchNorm layer.
+
+    Args:
+        num_features (int): Number of features in the input tensor.
+        eps (float, optional): Small value added to the variance to avoid division by zero. Default is 1e-5.
+        momentum (float, optional): Momentum factor for computing the running mean and variance. Default is 0.01.
+        r_max (float, optional): Maximum value for the scaling factor r. Default is 3.0.
+        d_max (float, optional): Maximum value for the bias factor d. Default is 5.0.
+        warmup_steps (int, optional): Number of warm-up steps for the running mean and variance. Default is 5000.
+    """
+
+    def __init__(
+        self,
+        num_features,
+        eps=0.01,
+        momentum=0.99,
+        r_max=3.0,
+        d_max=5.0,
+        warmup_steps=100000,
+    ):
+
+        super(BatchRenorm, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.momentum = momentum
+        self.r_max = r_max
+        self.d_max = d_max
+        self.warmup_steps = warmup_steps
+        self.step_count = 0
+
+        self.gamma = nn.Parameter(torch.ones(num_features))
+        self.beta = nn.Parameter(torch.zeros(num_features))
+
+        self.register_buffer("running_mean", torch.zeros(num_features))
+        self.register_buffer("running_var", torch.ones(num_features))
+
+    def forward(self, x):
+        self.step_count += 1
+
+        # Compute the dimensions for mean and variance calculation
+        dims = [i for i in range(x.dim()) if i != 1]
+        expand_dims = [1 if i != 1 else -1 for i in range(x.dim())]
+
+        # Compute batch statistics
+        batch_mean = x.mean(dims, keepdim=True)
+        batch_var = x.var(dims, unbiased=False, keepdim=True)
+
+        if self.training:
+            if self.step_count <= self.warmup_steps:
+                # Use classical BatchNorm during warmup
+                x_hat = (x - batch_mean) / torch.sqrt(batch_var + self.eps)
+            else:
+                # Use Batch Renormalization
+                with torch.no_grad():
+                    r = torch.clamp(
+                        batch_var / self.running_var.view(*expand_dims),
+                        1.0 / self.r_max,
+                        self.r_max,
+                    )
+                    d = torch.clamp(
+                        (batch_mean - self.running_mean.view(*expand_dims))
+                        / torch.sqrt(self.running_var.view(*expand_dims) + self.eps),
+                        -self.d_max,
+                        self.d_max,
+                    )
+
+                x_hat = (x - batch_mean) / torch.sqrt(batch_var + self.eps)
+                x_hat = x_hat * r + d
+
+            # Update running statistics
+            self.running_mean.mul_(1 - self.momentum).add_(
+                batch_mean.squeeze().detach() * self.momentum
+            )
+            self.running_var.mul_(1 - self.momentum).add_(
+                batch_var.squeeze().detach() * self.momentum
+            )
+        else:
+            # Use running statistics during inference
+            x_hat = (x - self.running_mean.view(*expand_dims)) / torch.sqrt(
+                self.running_var.view(*expand_dims) + self.eps
+            )
+
+        return self.gamma.view(*expand_dims) * x_hat + self.beta.view(*expand_dims)
diff --git a/sota-implementations/crossq/config.yaml b/sota-implementations/crossq/config.yaml
@@ -0,0 +1,58 @@
+# environment and task
+env:
+  name: HalfCheetah-v4
+  task: ""
+  library: gym
+  max_episode_steps: 1000
+  seed: 42
+
+# collector
+collector:
+  total_frames: 1_000_000
+  init_random_frames: 25000
+  frames_per_batch: 1000
+  init_env_steps: 1000
+  device: cpu
+  env_per_collector: 1
+  reset_at_each_iter: False
+
+# replay buffer
+replay_buffer:
+  size: 1000000
+  prb: 0 # use prioritized experience replay
+  scratch_dir: null
+
+# optim
+optim:
+  utd_ratio: 1.0
+  policy_update_delay: 3
+  gamma: 0.99
+  loss_function: l2
+  lr: 1.0e-3
+  weight_decay: 0.0
+  batch_size: 256
+  alpha_init: 1.0
+  adam_eps: 1.0e-8
+  beta1: 0.5
+  beta2: 0.999
+
+# network
+network:
+  batch_norm_momentum: 0.99
+  warmup_steps: 100000 
+  critic_hidden_sizes: [2048, 2048]
+  actor_hidden_sizes: [256, 256]
+  critic_activation: relu
+  actor_activation: relu
+  default_policy_scale: 1.0
+  scale_lb: 0.1
+  device: "cuda:0"
+
+# logging
+logger:
+  backend: wandb
+  project_name: torchrl_example_crossQ
+  group_name: null
+  exp_name: ${env.name}_CrossQ
+  mode: online
+  eval_iter: 25000