diff --git a/CHANGELOG.md b/CHANGELOG.md
index 745e2b14..13e167ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [1.1.0] - 2024-02-12
+### Added
+- MultiCategorical mixin to operate MultiDiscrete action spaces
+
+### Changed (breaking changes)
+- Rename the `ManualTrainer` to `StepTrainer`
+- Output training/evaluation progress messages to system's stdout
+- Get single observation/action spaces for vectorized environments
+- Update Isaac Orbit environment wrapper
+
 ## [1.0.0] - 2023-08-16
 
 Transition from pre-release versions (`1.0.0-rc.1` and`1.0.0-rc.2`) to a stable version.
diff --git a/docs/source/_static/imgs/model_categorical_cnn-dark.svg b/docs/source/_static/imgs/model_categorical_cnn-dark.svg
index 1c312ccc..9ae9bf44 100755
--- a/docs/source/_static/imgs/model_categorical_cnn-dark.svg
+++ b/docs/source/_static/imgs/model_categorical_cnn-dark.svg
@@ -1 +1 @@
-<svg width="5146" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5146" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><path d="M5304.5 1291.5 5489.76 1522.88" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 231.078" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1708.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5472.23 1830)">FC5</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5614.33 1639)">logits</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5517.49 1492)">n</text><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1523" width="87" height="185" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="#F4B183"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/></g></svg>
+<svg width="5146" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><path d="M5304.5 1291.5 5489.76 1522.88" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 231.078" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1708.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5472.23 1830)">FC5</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5614.33 1639)">logits</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5517.49 1492)">n</text><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1523" width="87" height="185" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="#F4B183"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/></g></svg>
diff --git a/docs/source/_static/imgs/model_categorical_cnn-light.svg b/docs/source/_static/imgs/model_categorical_cnn-light.svg
index da4893c4..030210e4 100755
--- a/docs/source/_static/imgs/model_categorical_cnn-light.svg
+++ b/docs/source/_static/imgs/model_categorical_cnn-light.svg
@@ -1 +1 @@
-<svg width="5146" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5146" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><path d="M5304.5 1291.5 5489.76 1522.88" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 231.078" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1708.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5472.23 1830)">FC5</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5614.33 1639)">logits</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5517.49 1492)">n</text><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1523" width="87" height="185" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="#F4B183"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/></g></svg>
+<svg width="5146" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><path d="M5304.5 1291.5 5489.76 1522.88" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 231.078" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1708.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5472.23 1830)">FC5</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5614.33 1639)">logits</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5517.49 1492)">n</text><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1523" width="87" height="185" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="#F4B183"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/></g></svg>
diff --git a/docs/source/_static/imgs/model_deterministic_cnn-dark.svg b/docs/source/_static/imgs/model_deterministic_cnn-dark.svg
index df29d7a3..f901f94d 100755
--- a/docs/source/_static/imgs/model_deterministic_cnn-dark.svg
+++ b/docs/source/_static/imgs/model_deterministic_cnn-dark.svg
@@ -1 +1 @@
-<svg width="5232" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5232" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 184.201 226.409" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.7 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M5214.76 2151.82 5032.5 1921.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/><path d="M5576.5 1292.5 5761.76 1566.6" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 264.649" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5761.76 1676.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5488.76 1940.5)"/><path d="M5303.5 1083.5 5488.76 1294.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.06 2275)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5231.99 2352)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5185.68 2429)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5471.88 2065)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5513.7 2142)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5458.5 2219)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.13 1052)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5501.95 1264)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5749.56 1803)">FC5</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5794.82 1530)">1</text><rect x="5216" y="1083" width="87" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1293" width="87" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5766" y="1566" width="88" height="106" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4941" y="1309" width="88" height="340" stroke="#404040" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#BFBFBF"/><rect x="4941" y="1650" width="88" height="271" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/><path d="M0-2.29167 36.6667-2.29167 36.6667 2.29167 0 2.29167ZM50.4167-2.29167 55-2.29167 55 2.29167 50.4167 2.29167ZM64.6847 6.35694 64.6847 43.0236 60.1014 43.0236 60.1014 6.35694ZM64.6847 56.7736 64.6847 61.3569 60.1014 61.3569 60.1014 56.7736ZM64.6847 75.1069 64.6847 111.774 60.1014 111.774 60.1014 75.1069ZM64.6847 125.524 64.6847 130.107 60.1014 130.107 60.1014 125.524ZM69.5627 134.396 101.869 134.396 101.869 138.979 69.5627 138.979ZM97.2861 122.937 124.786 136.687 97.2861 150.437Z" fill="#BFBFBF" transform="matrix(1 0 0 -1 4787.5 1616.19)"/><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4903.34 2041)">taken</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4876.13 2118)">actions</text></g></svg>
+<svg width="5232" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 184.201 226.409" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.7 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M5214.76 2151.82 5032.5 1921.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/><path d="M5576.5 1292.5 5761.76 1566.6" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 264.649" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5761.76 1676.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5488.76 1940.5)"/><path d="M5303.5 1083.5 5488.76 1294.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.06 2275)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5231.99 2352)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5185.68 2429)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5471.88 2065)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5513.7 2142)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5458.5 2219)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.13 1052)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5501.95 1264)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5749.56 1803)">FC5</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5794.82 1530)">1</text><rect x="5216" y="1083" width="87" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1293" width="87" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5766" y="1566" width="88" height="106" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4941" y="1309" width="88" height="340" stroke="#404040" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#BFBFBF"/><rect x="4941" y="1650" width="88" height="271" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/><path d="M0-2.29167 36.6667-2.29167 36.6667 2.29167 0 2.29167ZM50.4167-2.29167 55-2.29167 55 2.29167 50.4167 2.29167ZM64.6847 6.35694 64.6847 43.0236 60.1014 43.0236 60.1014 6.35694ZM64.6847 56.7736 64.6847 61.3569 60.1014 61.3569 60.1014 56.7736ZM64.6847 75.1069 64.6847 111.774 60.1014 111.774 60.1014 75.1069ZM64.6847 125.524 64.6847 130.107 60.1014 130.107 60.1014 125.524ZM69.5627 134.396 101.869 134.396 101.869 138.979 69.5627 138.979ZM97.2861 122.937 124.786 136.687 97.2861 150.437Z" fill="#BFBFBF" transform="matrix(1 0 0 -1 4787.5 1616.19)"/><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4903.34 2041)">taken</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4876.13 2118)">actions</text></g></svg>
diff --git a/docs/source/_static/imgs/model_deterministic_cnn-light.svg b/docs/source/_static/imgs/model_deterministic_cnn-light.svg
index 7ddb76f7..d12de8de 100755
--- a/docs/source/_static/imgs/model_deterministic_cnn-light.svg
+++ b/docs/source/_static/imgs/model_deterministic_cnn-light.svg
@@ -1 +1 @@
-<svg width="5232" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5232" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 184.201 226.409" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.7 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M5214.76 2151.82 5032.5 1921.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/><path d="M5576.5 1292.5 5761.76 1566.6" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 264.649" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5761.76 1676.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5488.76 1940.5)"/><path d="M5303.5 1083.5 5488.76 1294.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.06 2275)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5231.99 2352)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5185.68 2429)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5471.88 2065)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5513.7 2142)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5458.5 2219)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.13 1052)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5501.95 1264)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5749.56 1803)">FC5</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5794.82 1530)">1</text><rect x="5216" y="1083" width="87" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1293" width="87" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5766" y="1566" width="88" height="106" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4941" y="1309" width="88" height="340" stroke="#404040" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#BFBFBF"/><rect x="4941" y="1650" width="88" height="271" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/><path d="M0-2.29167 36.6667-2.29167 36.6667 2.29167 0 2.29167ZM50.4167-2.29167 55-2.29167 55 2.29167 50.4167 2.29167ZM64.6847 6.35694 64.6847 43.0236 60.1014 43.0236 60.1014 6.35694ZM64.6847 56.7736 64.6847 61.3569 60.1014 61.3569 60.1014 56.7736ZM64.6847 75.1069 64.6847 111.774 60.1014 111.774 60.1014 75.1069ZM64.6847 125.524 64.6847 130.107 60.1014 130.107 60.1014 125.524ZM69.5627 134.396 101.869 134.396 101.869 138.979 69.5627 138.979ZM97.2861 122.937 124.786 136.687 97.2861 150.437Z" fill="#7F7F7F" transform="matrix(1 0 0 -1 4787.5 1616.19)"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4903.34 2041)">t</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4924.53 2041)">aken</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4876.13 2118)">actions</text></g></svg>
+<svg width="5232" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 184.201 226.409" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.7 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M5214.76 2151.82 5032.5 1921.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/><path d="M5576.5 1292.5 5761.76 1566.6" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 264.649" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5761.76 1676.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5488.76 1940.5)"/><path d="M5303.5 1083.5 5488.76 1294.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.06 2275)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5231.99 2352)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5185.68 2429)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5471.88 2065)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5513.7 2142)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5458.5 2219)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.13 1052)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5501.95 1264)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5749.56 1803)">FC5</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5794.82 1530)">1</text><rect x="5216" y="1083" width="87" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5489" y="1293" width="87" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5766" y="1566" width="88" height="106" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4941" y="1309" width="88" height="340" stroke="#404040" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="#BFBFBF"/><rect x="4941" y="1650" width="88" height="271" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/><path d="M0-2.29167 36.6667-2.29167 36.6667 2.29167 0 2.29167ZM50.4167-2.29167 55-2.29167 55 2.29167 50.4167 2.29167ZM64.6847 6.35694 64.6847 43.0236 60.1014 43.0236 60.1014 6.35694ZM64.6847 56.7736 64.6847 61.3569 60.1014 61.3569 60.1014 56.7736ZM64.6847 75.1069 64.6847 111.774 60.1014 111.774 60.1014 75.1069ZM64.6847 125.524 64.6847 130.107 60.1014 130.107 60.1014 125.524ZM69.5627 134.396 101.869 134.396 101.869 138.979 69.5627 138.979ZM97.2861 122.937 124.786 136.687 97.2861 150.437Z" fill="#7F7F7F" transform="matrix(1 0 0 -1 4787.5 1616.19)"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4903.34 2041)">taken</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4876.13 2118)">actions</text></g></svg>
diff --git a/docs/source/_static/imgs/model_gaussian_cnn-dark.svg b/docs/source/_static/imgs/model_gaussian_cnn-dark.svg
index 411fa4e2..a48fa5c8 100755
--- a/docs/source/_static/imgs/model_gaussian_cnn-dark.svg
+++ b/docs/source/_static/imgs/model_gaussian_cnn-dark.svg
@@ -1 +1 @@
-<svg width="5198" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5198" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><path d="M5304.5 1291.5 5430.05 1420.71" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 188.329" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1751.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><path d="M5473.5 1463.5 5491.48 1480.73" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5640.7 1599)">mean</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5612.63 1676)">actions</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5473.28 1874)">FC5</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5481.14 1382)">num</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5447.64 1448)">actions</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/><rect x="5491" y="1481" width="87" height="270" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/></g></svg>
+<svg width="5198" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><path d="M5304.5 1291.5 5430.05 1420.71" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 188.329" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1751.5)"/><path d="M0 0 185.263 210.592" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><rect x="3451" y="1536" width="495" height="222" fill="#7F7F7F"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#666666" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#989898" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#7F7F7F"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#666666" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#989898" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#7F7F7F"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#666666" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#989898" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#BFBFBF"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#BFBFBF"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#BFBFBF"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#BFBFBF"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><path d="M5473.5 1463.5 5491.48 1480.73" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5640.7 1599)">mean</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5612.63 1676)">actions</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5473.28 1874)">FC5</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5481.14 1382)">num</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5447.64 1448)">actions</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#595959" stroke-width="10.3125" stroke-miterlimit="8" fill="#595959"/><rect x="5491" y="1481" width="87" height="270" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/></g></svg>
diff --git a/docs/source/_static/imgs/model_gaussian_cnn-light.svg b/docs/source/_static/imgs/model_gaussian_cnn-light.svg
index 426753a5..276e8b87 100755
--- a/docs/source/_static/imgs/model_gaussian_cnn-light.svg
+++ b/docs/source/_static/imgs/model_gaussian_cnn-light.svg
@@ -1 +1 @@
-<svg width="5198" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="677" y="476" width="5198" height="2362"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-677 -476)"><path d="M5304.5 1291.5 5430.05 1420.71" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 188.329" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1751.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1983.47 2106)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1580.22 1805)">128</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><path d="M5473.5 1463.5 5491.48 1480.73" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5640.7 1599)">mean</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5612.63 1676)">actions</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5473.28 1874)">FC5</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5481.14 1382)">num</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5447.64 1448)">actions</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/><rect x="5491" y="1481" width="87" height="270" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/></g></svg>
+<svg width="5198" height="2362" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-677 -476)"><path d="M5304.5 1291.5 5430.05 1420.71" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 185.262 188.329" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5489.76 1751.5)"/><path d="M0 0 185.263 210.592" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 5216.76 1939.5)"/><path d="M5031.5 1082.5 5216.76 1293.09" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4926.61 2274)">FC3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4959.54 2351)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4913.22 2428)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5199.42 2064)">FC4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5241.25 2141)">+</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5186.04 2218)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4956.67 1051)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5229.49 1262)">32</text><rect x="3451" y="1536" width="495" height="222" fill="#F2F2F2"/><path d="M3946 1536 4020 1462 4020 1684 3946 1758Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 3946 1536Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M3451 1536 3525 1462 4020 1462 4020 1684 3946 1758 3451 1758ZM3451 1536 3946 1536 4020 1462M3946 1536 3946 1758" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4679.29 1415)">16</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4395.29 803)">512</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4128.47 546)">1024</text><rect x="2755" y="1520.5" width="491.5" height="268.5" fill="#F2F2F2"/><path d="M3246.5 1520.5 3336 1431 3336 1699.5 3246.5 1789Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3246.5 1520.5Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2755 1520.5 2844.5 1431 3336 1431 3336 1699.5 3246.5 1789 2755 1789ZM2755 1520.5 3246.5 1520.5 3336 1431M3246.5 1520.5 3246.5 1789" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="2234" y="1401.47" width="248.531" height="596.531" fill="#F2F2F2"/><path d="M2482.53 1401.47 2654 1230 2654 1826.53 2482.53 1998Z" fill="#C3C3C3" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2482.53 1401.47Z" fill="#F5F5F5" fill-rule="evenodd"/><path d="M2234 1401.47 2405.47 1230 2654 1230 2654 1826.53 2482.53 1998 2234 1998ZM2234 1401.47 2482.53 1401.47 2654 1230M2482.53 1401.47 2482.53 1998" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M0 395 395.84 0 1237 0 835.593 395 0 395Z" stroke="#FF3300" stroke-width="13.75" stroke-miterlimit="8" fill="#FF0000" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1708 981)"/><path d="M0 395 396.16 0 1238 0 836.269 395 0 395Z" stroke="#548235" stroke-width="13.75" stroke-miterlimit="8" fill="#385723" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1734 995)"/><path d="M0 395 395.84 0 1237 0 835.594 395 0 395Z" stroke="#0070C0" stroke-width="13.75" stroke-miterlimit="8" fill="#0070C0" fill-rule="evenodd" fill-opacity="0.2" transform="matrix(6.12323e-17 1 1 -6.12323e-17 1760 1010)"/><path d="M0 0 185.001 363.904" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 4943.5 1082.5)"/><path d="M4670.7 1446.27 4486.5 834.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M4943.5 2150.41 4758.5 1786.5" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M0 0 184.202 608.225" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4486.5 2397.73)"/><path d="M0 0 203.001 260.413" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 4196.5 2658.91)"/><path d="M4196.5 577.5 4399.5 837.221" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><path d="M3997 1610.84 4007.31 1610.84 4007.31 1621.16 3997 1621.16ZM4017.63 1610.84 4027.94 1610.84 4027.94 1621.16 4017.63 1621.16ZM4038.25 1610.84 4048.56 1610.84 4048.56 1621.16 4038.25 1621.16ZM4058.88 1610.84 4069.19 1610.84 4069.19 1621.16 4058.88 1621.16ZM4079.5 1610.84 4089.81 1610.84 4089.81 1621.16 4079.5 1621.16ZM4100.13 1610.84 4110.44 1610.84 4110.44 1621.16 4100.12 1621.16ZM4120.75 1610.84 4131.06 1610.84 4131.06 1621.16 4120.75 1621.16ZM4129.95 1600.53 4160.89 1616 4129.95 1631.47Z" fill="#595959"/><path d="M3312 1610.84 3322.31 1610.84 3322.31 1621.16 3312 1621.16ZM3332.62 1610.84 3342.94 1610.84 3342.94 1621.16 3332.62 1621.16ZM3353.25 1610.84 3363.56 1610.84 3363.56 1621.16 3353.25 1621.16ZM3373.88 1610.84 3384.19 1610.84 3384.19 1621.16 3373.88 1621.16ZM3394.5 1610.84 3398.16 1610.84 3398.16 1621.16 3394.5 1621.16ZM3393 1600.53 3423.94 1616 3393 1631.47Z" fill="#595959"/><path d="M2591 1607.84 2601.31 1607.84 2601.31 1618.16 2591 1618.16ZM2611.62 1607.84 2621.94 1607.84 2621.94 1618.16 2611.62 1618.16ZM2632.25 1607.84 2642.56 1607.84 2642.56 1618.16 2632.25 1618.16ZM2652.87 1607.84 2663.19 1607.84 2663.19 1618.16 2652.87 1618.16ZM2673.5 1607.84 2683.81 1607.84 2683.81 1618.16 2673.5 1618.16ZM2688 1597.53 2718.93 1613 2688 1628.47Z" fill="#595959"/><path d="M1981 1607.84 1991.31 1607.84 1991.31 1618.16 1981 1618.16ZM2001.62 1607.84 2011.94 1607.84 2011.94 1618.16 2001.62 1618.16ZM2022.25 1607.84 2032.56 1607.84 2032.56 1618.16 2022.25 1618.16ZM2042.88 1607.84 2053.19 1607.84 2053.19 1618.16 2042.88 1618.16ZM2063.5 1607.84 2073.81 1607.84 2073.81 1618.16 2063.5 1618.16ZM2084.12 1607.84 2094.44 1607.84 2094.44 1618.16 2084.12 1618.16ZM2104.75 1607.84 2115.06 1607.84 2115.06 1618.16 2104.75 1618.16ZM2125.38 1607.84 2135.69 1607.84 2135.69 1618.16 2125.38 1618.16ZM2146 1607.84 2156.31 1607.84 2156.31 1618.16 2146 1618.16ZM2166.62 1607.84 2173.35 1607.84 2173.35 1618.16 2166.62 1618.16ZM2168.19 1597.53 2199.13 1613 2168.19 1628.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4656.57 1913)">FC2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4689.5 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4643.19 2067)">Tanh</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4383.88 2524)">FC1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4416.81 2601)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4360.66 2678)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4100.26 2784)">flatten</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3721.62 1426)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3034.76 1401)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2495.85 1197)">32</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2113.97 922)">3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 4002.58 1770)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1998.65 2106)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3317.02 1798)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2605.75 1955)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3180.9 1675)">6</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 3882.97 1661)">4</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 2386.59 1706)">15</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 1595.4 1805)">64</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3612.23 1879)">Conv3</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3682.4 1956)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3626.25 2033)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2901.81 1913)">Conv2</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2971.98 1990)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2915.83 2067)">ReLU</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2260.84 2123)">Conv1</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2331.01 2200)">+ </text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2274.86 2277)">ReLU</text><rect x="937" y="1169" width="87.9999" height="934" stroke="#548235" stroke-width="10.3125" stroke-miterlimit="8" fill="#A9D18E"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 715.826 1659)">states</text><path d="M1083 1632.84 1093.31 1632.84 1093.31 1643.16 1083 1643.16ZM1103.63 1632.84 1113.94 1632.84 1113.94 1643.16 1103.63 1643.16ZM1124.25 1632.84 1134.56 1632.84 1134.56 1643.16 1124.25 1643.16ZM1144.88 1632.84 1155.19 1632.84 1155.19 1643.16 1144.88 1643.16ZM1165.5 1632.84 1175.81 1632.84 1175.81 1643.16 1165.5 1643.16ZM1186.13 1632.84 1196.44 1632.84 1196.44 1643.16 1186.13 1643.16ZM1206.75 1632.84 1217.06 1632.84 1217.06 1643.16 1206.75 1643.16ZM1227.38 1632.84 1237.69 1632.84 1237.69 1643.16 1227.38 1643.16ZM1248 1632.84 1258.31 1632.84 1258.31 1643.16 1248 1643.16ZM1268.63 1632.84 1278.94 1632.84 1278.94 1643.16 1268.63 1643.16ZM1289.25 1632.84 1299.56 1632.84 1299.56 1643.16 1289.25 1643.16ZM1309.88 1632.84 1320.19 1632.84 1320.19 1643.16 1309.88 1643.16ZM1330.5 1632.84 1340.81 1632.84 1340.81 1643.16 1330.5 1643.16ZM1351.13 1632.84 1361.44 1632.84 1361.44 1643.16 1351.13 1643.16ZM1371.75 1632.84 1382.06 1632.84 1382.06 1643.16 1371.75 1643.16ZM1392.38 1632.84 1402.69 1632.84 1402.69 1643.16 1392.38 1643.16ZM1413 1632.84 1423.31 1632.84 1423.31 1643.16 1413 1643.16ZM1433.63 1632.84 1443.94 1632.84 1443.94 1643.16 1433.63 1643.16ZM1454.25 1632.84 1464.56 1632.84 1464.56 1643.16 1454.25 1643.16ZM1474.88 1632.84 1485.19 1632.84 1485.19 1643.16 1474.88 1643.16ZM1495.5 1632.84 1505.81 1632.84 1505.81 1643.16 1495.5 1643.16ZM1516.13 1632.84 1526.44 1632.84 1526.44 1643.16 1516.13 1643.16ZM1536.75 1632.84 1547.06 1632.84 1547.06 1643.16 1536.75 1643.16ZM1557.38 1632.84 1567.69 1632.84 1567.69 1643.16 1557.38 1643.16ZM1578 1632.84 1588.31 1632.84 1588.31 1643.16 1578 1643.16ZM1598.63 1632.84 1608.94 1632.84 1608.94 1643.16 1598.63 1643.16ZM1619.25 1632.84 1623 1632.84 1623 1643.16 1619.25 1643.16ZM1617.84 1622.53 1648.78 1638 1617.84 1653.47Z" fill="#595959"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1581)">reshape (view)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1113.29 1735)">and permute</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1082.92 1812)">(channels first)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 898.792 1139)">12288</text><path d="M5473.5 1463.5 5491.48 1480.73" stroke="#7F7F7F" stroke-width="6.875" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="20.625 6.875" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5640.7 1599)">mean</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5612.63 1676)">actions</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 5473.28 1874)">FC5</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5481.14 1382)">num</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="55" transform="matrix(1 0 0 1 5447.64 1448)">actions</text><rect x="4943" y="1082" width="88" height="1068" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="5216" y="1292" width="88" height="647" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4671" y="1446" width="87" height="340" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4399" y="834" width="87" height="1564" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#BFBFBF"/><rect x="4188" y="577" width="16" height="2081" stroke="#404040" stroke-width="10.3125" stroke-miterlimit="8" fill="#404040"/><rect x="5491" y="1481" width="87" height="270" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="#CC99FF"/></g></svg>
diff --git a/docs/source/_static/imgs/model_multicategorical-dark.svg b/docs/source/_static/imgs/model_multicategorical-dark.svg
new file mode 100755
index 00000000..dc246c43
--- /dev/null
+++ b/docs/source/_static/imgs/model_multicategorical-dark.svg
@@ -0,0 +1 @@
+<svg width="4032" height="1399" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-341 -65)"><path d="M1056 583C1056 558.699 1075.7 539 1100 539 1124.3 539 1144 558.699 1144 583 1144 607.301 1124.3 627 1100 627 1075.7 627 1056 607.301 1056 583Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 735.5C1056 711.476 1075.7 692 1100 692 1124.3 692 1144 711.476 1144 735.5 1144 759.524 1124.3 779 1100 779 1075.7 779 1056 759.524 1056 735.5Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 888C1056 863.699 1075.7 844 1100 844 1124.3 844 1144 863.699 1144 888 1144 912.3 1124.3 932 1100 932 1075.7 932 1056 912.3 1056 888Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 1041C1056 1016.7 1075.7 997 1100 997 1124.3 997 1144 1016.7 1144 1041 1144 1065.3 1124.3 1085 1100 1085 1075.7 1085 1056 1065.3 1056 1041Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 1193C1056 1168.7 1075.7 1149 1100 1149 1124.3 1149 1144 1168.7 1144 1193 1144 1217.3 1124.3 1237 1100 1237 1075.7 1237 1056 1217.3 1056 1193Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 583C1286 558.699 1305.7 539 1330 539 1354.3 539 1374 558.699 1374 583 1374 607.301 1354.3 627 1330 627 1305.7 627 1286 607.301 1286 583Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 735.5C1286 711.476 1305.7 692 1330 692 1354.3 692 1374 711.476 1374 735.5 1374 759.524 1354.3 779 1330 779 1305.7 779 1286 759.524 1286 735.5Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 888C1286 863.699 1305.7 844 1330 844 1354.3 844 1374 863.699 1374 888 1374 912.3 1354.3 932 1330 932 1305.7 932 1286 912.3 1286 888Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 1041C1286 1016.7 1305.7 997 1330 997 1354.3 997 1374 1016.7 1374 1041 1374 1065.3 1354.3 1085 1330 1085 1305.7 1085 1286 1065.3 1286 1041Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 1193C1286 1168.7 1305.7 1149 1330 1149 1354.3 1149 1374 1168.7 1374 1193 1374 1217.3 1354.3 1237 1330 1237 1305.7 1237 1286 1217.3 1286 1193Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1516 735.5C1516 711.476 1535.7 692 1560 692 1584.3 692 1604 711.476 1604 735.5 1604 759.524 1584.3 779 1560 779 1535.7 779 1516 759.524 1516 735.5Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1516 888C1516 863.699 1535.7 844 1560 844 1584.3 844 1604 863.699 1604 888 1604 912.3 1584.3 932 1560 932 1535.7 932 1516 912.3 1516 888Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1516 1041C1516 1016.7 1535.7 997 1560 997 1584.3 997 1604 1016.7 1604 1041 1604 1065.3 1584.3 1085 1560 1085 1535.7 1085 1516 1065.3 1516 1041Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M826 735.5C826 711.476 845.7 692 870 692 894.301 692 914 711.476 914 735.5 914 759.524 894.301 779 870 779 845.7 779 826 759.524 826 735.5Z" fill="#70AD47" fill-rule="evenodd"/><path d="M826 888C826 863.699 845.7 844 870 844 894.301 844 914 863.699 914 888 914 912.3 894.301 932 870 932 845.7 932 826 912.3 826 888Z" fill="#70AD47" fill-rule="evenodd"/><path d="M826 1041C826 1016.7 845.7 997 870 997 894.301 997 914 1016.7 914 1041 914 1065.3 894.301 1085 870 1085 845.7 1085 826 1065.3 826 1041Z" fill="#70AD47" fill-rule="evenodd"/><path d="M1390.5 583.5 1500.84 735.808" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 735.5 1500.84 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 887.809)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1192.43)"/><path d="M1500.84 887.809 1390.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 888.5 1500.84 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1040.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1193.12)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1192.81)"/><path d="M1390.5 1040.5 1500.84 1040.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 888.5 1500.84 1040.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 735.5 1500.84 1040.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 583.5 1500.84 1040.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 583.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 735.808)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 888.117)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.43)"/><path d="M0 0 110.337 609.234" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.73)"/><path d="M1160.5 583.5 1270.84 735.808" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 887.809)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.43)"/><path d="M1160.5 583.5 1270.84 888.117" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 887.809" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1193.12)"/><path d="M1160.5 1192.5 1270.84 1192.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 1040.5 1270.84 1192.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 1193.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 1192.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 1192.73" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.81)"/><path d="M1160.5 1040.5 1270.84 1040.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 1040.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 1040.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 1040.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M929.5 1040.5 1039.84 1192.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 1040.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 888.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 888.5)"/><path d="M1039.84 887.809 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 583.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 735.5)"/><path d="M1039.84 1040.12 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1039.84 1192.43 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 583.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 735.5)"/><path d="M1039.84 1040.81 929.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1039.84 1193.12 929.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 929.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 929.5 1040.43)"/><path d="M1390.5 583.5 1500.84 888.117" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 813.962 590)">input</text><text fill="#5B9BD5" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1116.98 500)">hidden</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1461.05 591)">output</text><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 809.622 735.5)"/><path d="M809.622 888.5 728.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 809.622 1040.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 1040.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 888.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 735.5)"/><rect x="765" y="400" width="900" height="871" stroke="#BFBFBF" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 380.469 910)">inputs (</text><path d="M630.625 878.045C632.229 878.045 633.953 878.175 635.797 878.436 637.641 878.696 639.323 879.045 640.844 879.482L639.219 886.951 635.25 886.951C635.063 885.014 634.568 883.587 633.766 882.67 632.964 881.753 631.813 881.295 630.313 881.295 628.917 881.295 627.797 881.654 626.953 882.373 626.109 883.092 625.688 884.066 625.688 885.295 625.688 886.378 626.042 887.326 626.75 888.139 627.458 888.951 628.833 889.972 630.875 891.201 633.083 892.514 634.667 893.868 635.625 895.264 636.583 896.659 637.063 898.295 637.063 900.17 637.063 902.295 636.495 904.107 635.359 905.607 634.224 907.107 632.672 908.222 630.703 908.951 628.734 909.68 626.49 910.045 623.969 910.045 619.927 910.045 616.052 909.545 612.344 908.545L614.094 900.795 618.063 900.795C618.167 902.67 618.651 904.139 619.516 905.201 620.38 906.264 621.708 906.795 623.5 906.795 625.146 906.795 626.417 906.394 627.313 905.592 628.208 904.79 628.656 903.659 628.656 902.201 628.656 901.326 628.51 900.587 628.219 899.982 627.927 899.378 627.448 898.79 626.781 898.217 626.115 897.644 625.031 896.899 623.531 895.982 621.448 894.732 619.932 893.436 618.984 892.092 618.036 890.748 617.563 889.243 617.563 887.576 617.563 884.514 618.708 882.159 621 880.514 623.292 878.868 626.5 878.045 630.625 878.045Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="612.344" y="906.045">𝒔</text><path d="M652.136 894.127 657.644 894.127 656.313 899.865 661.89 899.865 661.27 902.733 655.647 902.733 653.421 911.913C653.039 913.489 652.775 914.686 652.63 915.505 652.484 916.323 652.411 917.015 652.411 917.582 652.411 918.301 652.542 918.825 652.802 919.154 653.062 919.482 653.444 919.647 653.949 919.647 654.638 919.647 655.345 919.402 656.072 918.913 656.799 918.423 657.583 917.666 658.424 916.641L660.191 918.454C658.646 920.152 657.227 921.334 655.934 921.999 654.641 922.665 653.115 922.998 651.356 922.998 649.612 922.998 648.273 922.55 647.34 921.655 646.406 920.76 645.94 919.525 645.94 917.949 645.94 917.23 645.993 916.545 646.1 915.895 646.208 915.245 646.429 914.177 646.766 912.693L649.13 902.733 645.55 902.733 645.986 900.875C647.057 900.875 647.868 900.775 648.418 900.576 648.969 900.377 649.432 900.079 649.807 899.681 650.182 899.283 650.545 898.679 650.897 897.868 651.249 897.057 651.662 895.81 652.136 894.127Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="645.55" y="919.389">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 665.002 910)">)</text><path d="M1733 705C1745.15 705 1755 706.642 1755 708.666L1755 884.334C1755 886.359 1764.85 888 1777 888 1764.85 888 1755 889.642 1755 891.667L1755 1067.33C1755 1069.36 1745.15 1071 1733 1071" stroke="#ED7D31" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1841 888C1841 880.82 1846.82 875 1854 875 1861.18 875 1867 880.82 1867 888 1867 895.18 1861.18 901 1854 901 1846.82 901 1841 895.18 1841 888Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1985.5 759.5C1985.5 752.32 1991.32 746.5 1998.5 746.5 2005.68 746.5 2011.5 752.32 2011.5 759.5 2011.5 766.68 2005.68 772.5 1998.5 772.5 1991.32 772.5 1985.5 766.68 1985.5 759.5Z" stroke="#FF3F3F" stroke-width="4.58333" stroke-miterlimit="8" fill="#FF3F3F" fill-rule="evenodd"/><path d="M1981 1016.5C1981 1009.04 1987.04 1003 1994.5 1003 2001.96 1003 2008 1009.04 2008 1016.5 2008 1023.96 2001.96 1030 1994.5 1030 1987.04 1030 1981 1023.96 1981 1016.5Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1984.52 1006.9 1863 897" stroke="#ED7D31" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 35.1538 0.000360892" stroke="#ED7D31" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1841.65 888.5)"/><path d="M0 0 30.3752 0.000360892" stroke="#FF3F3F" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2041.88 759.5)"/><path d="M0 0 34.2415 0.000360892" stroke="#ED7D31" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2042.74 1016.5)"/><path d="M1924 965 1924 1233.34" stroke="#ED7D31" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1911 952C1911 944.82 1916.82 939 1924 939 1931.18 939 1937 944.82 1937 952 1937 959.18 1931.18 965 1924 965 1916.82 965 1911 959.18 1911 952Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1880 1277.5C1880 1253.48 1899.7 1234 1924 1234 1948.3 1234 1968 1253.48 1968 1277.5 1968 1301.52 1948.3 1321 1924 1321 1899.7 1321 1880 1301.52 1880 1277.5Z" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1898 1277.5C1898 1262.86 1909.86 1251 1924.5 1251 1939.14 1251 1951 1262.86 1951 1277.5 1951 1292.14 1939.14 1304 1924.5 1304 1909.86 1304 1898 1292.14 1898 1277.5Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#BF9000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 1561.87 1410)">unnormalized_log_prob</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2105.84 1003)">log probabilities</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2232.15 1080)">(logits)</text><text fill="#FF3F3F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2152.74 743)">probabilities</text><text fill="#FF3F3F" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2222.34 820)">(probs)</text><path d="M2570 667C2582.15 667 2592 668.641 2592 670.666L2592 884.833C2592 886.858 2601.85 888.5 2614 888.5 2601.85 888.5 2592 890.141 2592 892.166L2592 1106.33C2592 1108.36 2582.15 1110 2570 1110" stroke="#A6A6A6" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2679.26 499)">categorical</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2663.22 576)">distributions</text><path d="M3033 667C3045.43 667 3055.5 668.679 3055.5 670.75L3055.5 884.75C3055.5 886.821 3065.57 888.5 3078 888.5 3065.57 888.5 3055.5 890.179 3055.5 892.25L3055.5 1106.25C3055.5 1108.32 3045.43 1110 3033 1110" stroke="#A6A6A6" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3877.84 722)">actions (</text><path d="M4162.17 693.63C4160.23 693.63 4158.52 694.604 4157.02 696.552 4155.52 698.5 4154.31 701.119 4153.39 704.411 4152.47 707.703 4152.02 710.651 4152.02 713.255 4152.02 714.776 4152.24 715.911 4152.69 716.661 4153.14 717.411 4153.83 717.786 4154.77 717.786 4156.14 717.786 4157.54 717.114 4158.97 715.77 4160.4 714.427 4161.55 712.849 4162.42 711.036 4163.3 709.224 4164.08 706.744 4164.77 703.599L4165.02 702.442C4165.35 700.901 4165.52 699.359 4165.52 697.817 4165.52 696.442 4165.26 695.401 4164.73 694.692 4164.21 693.984 4163.36 693.63 4162.17 693.63ZM4161.8 690.38C4164.9 690.38 4167.55 691.13 4169.73 692.63L4172.77 690.38 4176.27 690.88 4171.2 712.13C4170.89 713.442 4170.73 714.63 4170.73 715.692 4170.73 716.38 4170.85 716.911 4171.08 717.286 4171.31 717.661 4171.7 717.849 4172.27 717.849 4172.91 717.849 4173.61 717.557 4174.36 716.974 4175.11 716.39 4175.98 715.526 4176.98 714.38L4179.23 716.63C4177.05 718.817 4175.16 720.323 4173.58 721.145 4171.99 721.968 4170.33 722.38 4168.58 722.38 4166.83 722.38 4165.44 721.895 4164.42 720.927 4163.4 719.958 4162.89 718.682 4162.89 717.099 4162.89 716.578 4162.95 716.078 4163.08 715.599L4162.64 715.536C4161.12 717.328 4159.81 718.677 4158.72 719.583 4157.62 720.489 4156.46 721.182 4155.23 721.661 4154.01 722.14 4152.64 722.38 4151.14 722.38 4148.64 722.38 4146.74 721.531 4145.45 719.833 4144.16 718.135 4143.52 715.661 4143.52 712.411 4143.52 709.599 4143.92 706.807 4144.73 704.036 4145.55 701.265 4146.71 698.875 4148.23 696.864 4149.76 694.854 4151.67 693.27 4153.97 692.114 4156.27 690.958 4158.88 690.38 4161.8 690.38Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="4143.52" y="718.38">𝒂</text><path d="M4189.61 706.462 4195.12 706.462 4193.79 712.2 4199.36 712.2 4198.74 715.068 4193.12 715.068 4190.9 724.248C4190.51 725.824 4190.25 727.021 4190.1 727.84 4189.96 728.658 4189.89 729.35 4189.89 729.916 4189.89 730.635 4190.02 731.16 4190.28 731.488 4190.54 731.817 4190.92 731.982 4191.42 731.982 4192.11 731.982 4192.82 731.737 4193.55 731.247 4194.27 730.758 4195.06 730.001 4195.9 728.976L4197.67 730.789C4196.12 732.487 4194.7 733.669 4193.41 734.334 4192.12 735 4190.59 735.332 4188.83 735.332 4187.09 735.332 4185.75 734.885 4184.81 733.99 4183.88 733.095 4183.41 731.859 4183.41 730.284 4183.41 729.565 4183.47 728.88 4183.57 728.23 4183.68 727.579 4183.9 726.512 4184.24 725.028L4186.6 715.068 4183.02 715.068 4183.46 713.209C4184.53 713.209 4185.34 713.11 4185.89 712.911 4186.44 712.712 4186.91 712.414 4187.28 712.016 4187.66 711.618 4188.02 711.014 4188.37 710.203 4188.72 709.392 4189.14 708.145 4189.61 706.462Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="4183.02" y="731.724">𝒕</text><path d="M4215.06 707.702 4218.77 707.702 4218.77 719.773 4230.13 719.773 4230.13 723.284 4218.77 723.284 4218.77 735.355 4215.06 735.355 4215.06 723.284 4203.7 723.284 4203.7 719.773 4215.06 719.773Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="4203.7" y="731.899">+</text><path d="M4249.36 703.272 4252.3 703.272C4252.21 704.741 4252.16 706.776 4252.16 709.377L4252.16 728.562C4252.16 729.45 4252.21 730.119 4252.31 730.57 4252.41 731.022 4252.59 731.389 4252.84 731.672 4253.09 731.955 4253.46 732.177 4253.93 732.338 4254.4 732.498 4255 732.621 4255.72 732.705 4256.44 732.789 4257.39 732.846 4258.57 732.877L4258.57 734.965 4238.76 734.965 4238.76 732.877C4240.46 732.8 4241.67 732.697 4242.4 732.567 4243.12 732.437 4243.68 732.242 4244.06 731.982 4244.44 731.722 4244.72 731.355 4244.9 730.88 4245.08 730.406 4245.16 729.633 4245.16 728.562L4245.16 711.97C4245.16 711.343 4245.04 710.892 4244.81 710.616 4244.57 710.341 4244.25 710.203 4243.83 710.203 4243.45 710.203 4242.91 710.379 4242.21 710.731 4241.52 711.083 4240.37 711.802 4238.78 712.888L4237.38 710.456Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="4237.38" y="731.004">𝟏</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4264.35 722)">)</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3953.93 875)">log prob</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 3838.25 952)">evaluated at</text><path d="M4224.38 924.109C4222.23 924.109 4220.3 924.974 4218.59 926.703 4216.89 928.432 4215.55 930.75 4214.59 933.656 4213.64 936.562 4213.16 939.464 4213.16 942.359 4213.16 944.526 4213.45 946.146 4214.05 947.219 4214.64 948.292 4215.64 948.828 4217.03 948.828 4218.41 948.828 4219.76 948.234 4221.08 947.047 4222.4 945.859 4223.77 944.167 4225.17 941.969 4226.58 939.771 4227.57 937.234 4228.16 934.359L4228.44 933.016C4228.62 932.161 4228.75 931.422 4228.81 930.797 4228.88 930.172 4228.91 929.495 4228.91 928.766 4228.91 927.161 4228.56 925.984 4227.88 925.234 4227.19 924.484 4226.02 924.109 4224.38 924.109ZM4223.75 921.641C4225.1 921.641 4226.37 921.792 4227.55 922.094 4228.72 922.396 4229.94 922.932 4231.19 923.703L4234.06 921.641 4236.06 922.141 4231.47 941.953C4231.01 943.932 4230.78 945.474 4230.78 946.578 4230.78 947.411 4230.92 948.016 4231.19 948.391 4231.46 948.766 4231.89 948.953 4232.47 948.953 4233.09 948.953 4233.76 948.698 4234.47 948.188 4235.18 947.677 4236.18 946.703 4237.47 945.266L4239.25 947.016C4237.38 949.016 4235.78 950.438 4234.45 951.281 4233.13 952.125 4231.7 952.547 4230.16 952.547 4228.86 952.547 4227.83 952.12 4227.06 951.266 4226.29 950.411 4225.91 949.276 4225.91 947.859 4225.91 946.714 4226.15 945.495 4226.62 944.203L4226.22 944.078C4224.22 946.995 4222.32 949.13 4220.53 950.484 4218.74 951.839 4216.84 952.516 4214.84 952.516 4212.55 952.516 4210.78 951.651 4209.52 949.922 4208.26 948.193 4207.62 945.766 4207.62 942.641 4207.62 939.057 4208.33 935.615 4209.73 932.312 4211.14 929.01 4213.07 926.406 4215.53 924.5 4217.99 922.594 4220.73 921.641 4223.75 921.641Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="4207.62" y="948.684">𝑎</text><path d="M4249.73 936.354 4253.31 936.354 4252 942.183 4259.81 942.183 4259.28 944.73 4251.5 944.73 4249.16 954.346C4249.01 954.989 4248.88 955.566 4248.78 956.079 4248.68 956.591 4248.6 957.054 4248.54 957.467 4248.48 957.88 4248.44 958.255 4248.41 958.592 4248.39 958.928 4248.38 959.242 4248.38 959.533 4248.38 960.451 4248.57 961.17 4248.95 961.69 4249.33 962.21 4249.96 962.47 4250.83 962.47 4251.57 962.47 4252.35 962.199 4253.18 961.656 4254 961.112 4254.94 960.259 4256 959.097L4257.58 960.703C4256.8 961.575 4256.06 962.317 4255.36 962.929 4254.65 963.541 4253.96 964.042 4253.29 964.432 4252.62 964.823 4251.95 965.106 4251.29 965.281 4250.64 965.457 4249.96 965.545 4249.27 965.545 4247.45 965.545 4246.1 965.117 4245.2 964.26 4244.31 963.404 4243.86 962.141 4243.86 960.474 4243.86 959.908 4243.9 959.284 4243.98 958.603 4244.07 957.922 4244.19 957.261 4244.34 956.618L4247.07 944.73 4243.12 944.73 4243.58 942.963C4244.5 942.963 4245.22 942.875 4245.73 942.699 4246.24 942.523 4246.67 942.283 4247.02 941.977 4247.25 941.762 4247.48 941.498 4247.69 941.185 4247.9 940.871 4248.12 940.492 4248.33 940.049 4248.55 939.605 4248.77 939.081 4248.99 938.477 4249.21 937.872 4249.46 937.165 4249.73 936.354Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="4243.12" y="961.896">𝑡</text><path d="M4276.99 937.869 4280.71 937.869 4280.71 949.94 4292.07 949.94 4292.07 953.451 4280.71 953.451 4280.71 965.522 4276.99 965.522 4276.99 953.451 4265.63 953.451 4265.63 949.94 4276.99 949.94Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="4265.63" y="962.066">+</text><path d="M4311.04 935.574 4312.44 935.574C4312.41 936.415 4312.39 937.169 4312.37 937.834 4312.36 938.5 4312.35 939.146 4312.34 939.773 4312.33 940.401 4312.33 941.02 4312.33 941.632L4312.33 959.074C4312.33 959.548 4312.34 959.957 4312.37 960.302 4312.4 960.646 4312.45 960.936 4312.52 961.174 4312.59 961.411 4312.68 961.613 4312.8 961.782 4312.91 961.95 4313.05 962.095 4313.2 962.218 4313.38 962.401 4313.63 962.543 4313.93 962.642 4314.24 962.742 4314.63 962.826 4315.1 962.895 4315.58 962.964 4316.15 963.017 4316.81 963.055 4317.48 963.094 4318.28 963.12 4319.21 963.136L4319.21 965.132 4301.01 965.132 4301.01 963.136C4301.92 963.105 4302.68 963.071 4303.31 963.032 4303.94 962.994 4304.47 962.941 4304.92 962.872 4305.36 962.803 4305.71 962.727 4305.98 962.642 4306.25 962.558 4306.48 962.455 4306.68 962.333 4306.9 962.195 4307.08 962.046 4307.23 961.885 4307.39 961.724 4307.51 961.522 4307.6 961.277 4307.69 961.032 4307.77 960.734 4307.82 960.382 4307.87 960.03 4307.9 959.594 4307.9 959.074L4307.9 942.619C4307.9 942.022 4307.78 941.602 4307.55 941.357 4307.33 941.112 4307 940.99 4306.57 940.99 4306.09 940.99 4305.39 941.235 4304.46 941.724 4303.52 942.214 4302.31 942.94 4300.83 943.904 4300.63 943.552 4300.45 943.201 4300.28 942.849 4300.11 942.497 4299.94 942.137 4299.75 941.77 4301.65 940.73 4303.53 939.693 4305.39 938.66 4307.24 937.628 4309.13 936.599 4311.04 935.574Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="4299.75" y="961.438">1</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3964.82 1098)">outputs</text><rect x="729" y="208" width="3066" height="1243" stroke="#BFBFBF" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 886.674 349)">compute / __call__</text><text fill="#D9D9D9" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="83" transform="matrix(1 0 0 1 2202.25 170)">act</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3302.92 719)">actions (</text><path d="M3587.25 691.129C3585.31 691.129 3583.59 692.103 3582.09 694.051 3580.59 695.999 3579.38 698.618 3578.47 701.91 3577.55 705.202 3577.09 708.15 3577.09 710.754 3577.09 712.275 3577.32 713.41 3577.76 714.16 3578.21 714.91 3578.91 715.285 3579.84 715.285 3581.22 715.285 3582.62 714.613 3584.05 713.269 3585.47 711.926 3586.62 710.348 3587.5 708.535 3588.37 706.723 3589.16 704.243 3589.84 701.098L3590.09 699.941C3590.43 698.4 3590.59 696.858 3590.59 695.316 3590.59 693.941 3590.33 692.9 3589.81 692.191 3589.29 691.483 3588.44 691.129 3587.25 691.129ZM3586.87 687.879C3589.98 687.879 3592.62 688.629 3594.81 690.129L3597.84 687.879 3601.34 688.379 3596.28 709.629C3595.97 710.941 3595.81 712.129 3595.81 713.191 3595.81 713.879 3595.93 714.41 3596.16 714.785 3596.38 715.16 3596.78 715.348 3597.34 715.348 3597.99 715.348 3598.69 715.056 3599.44 714.473 3600.19 713.889 3601.06 713.025 3602.06 711.879L3604.31 714.129C3602.12 716.316 3600.24 717.822 3598.66 718.644 3597.07 719.467 3595.41 719.879 3593.66 719.879 3591.91 719.879 3590.52 719.394 3589.5 718.426 3588.48 717.457 3587.97 716.181 3587.97 714.598 3587.97 714.077 3588.03 713.577 3588.16 713.098L3587.72 713.035C3586.2 714.827 3584.89 716.176 3583.8 717.082 3582.7 717.988 3581.54 718.681 3580.31 719.16 3579.08 719.639 3577.72 719.879 3576.22 719.879 3573.72 719.879 3571.82 719.03 3570.53 717.332 3569.24 715.634 3568.59 713.16 3568.59 709.91 3568.59 707.098 3569 704.306 3569.81 701.535 3570.62 698.764 3571.79 696.374 3573.31 694.363 3574.83 692.353 3576.74 690.769 3579.05 689.613 3581.35 688.457 3583.96 687.879 3586.87 687.879Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3568.59" y="715.879">𝒂</text><path d="M3614.69 703.961 3620.19 703.961 3618.86 709.699 3624.44 709.699 3623.82 712.567 3618.2 712.567 3615.97 721.747C3615.59 723.323 3615.33 724.52 3615.18 725.339 3615.04 726.157 3614.96 726.849 3614.96 727.415 3614.96 728.135 3615.09 728.659 3615.35 728.987 3615.61 729.316 3616 729.481 3616.5 729.481 3617.19 729.481 3617.9 729.236 3618.62 728.747 3619.35 728.257 3620.13 727.5 3620.98 726.475L3622.74 728.288C3621.2 729.986 3619.78 731.168 3618.49 731.833 3617.19 732.499 3615.67 732.831 3613.91 732.831 3612.16 732.831 3610.82 732.384 3609.89 731.489 3608.96 730.594 3608.49 729.359 3608.49 727.783 3608.49 727.064 3608.54 726.379 3608.65 725.729 3608.76 725.078 3608.98 724.011 3609.32 722.527L3611.68 712.567 3608.1 712.567 3608.54 710.708C3609.61 710.708 3610.42 710.609 3610.97 710.41 3611.52 710.211 3611.98 709.913 3612.36 709.515 3612.73 709.117 3613.1 708.513 3613.45 707.702 3613.8 706.891 3614.21 705.644 3614.69 703.961Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3608.1" y="729.223">𝒕</text><path d="M3640.13 705.201 3643.85 705.201 3643.85 717.272 3655.21 717.272 3655.21 720.783 3643.85 720.783 3643.85 732.854 3640.13 732.854 3640.13 720.783 3628.77 720.783 3628.77 717.272 3640.13 717.272Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3628.77" y="729.398">+</text><path d="M3674.44 700.771 3677.38 700.771C3677.29 702.24 3677.24 704.275 3677.24 706.876L3677.24 726.061C3677.24 726.949 3677.29 727.618 3677.39 728.07 3677.49 728.521 3677.66 728.888 3677.92 729.171 3678.17 729.454 3678.53 729.676 3679.01 729.837 3679.48 729.997 3680.08 730.12 3680.8 730.204 3681.52 730.288 3682.46 730.345 3683.64 730.376L3683.64 732.464 3663.84 732.464 3663.84 730.376C3665.54 730.299 3666.75 730.196 3667.47 730.066 3668.2 729.936 3668.76 729.741 3669.14 729.481 3669.52 729.221 3669.8 728.854 3669.98 728.379 3670.15 727.905 3670.24 727.132 3670.24 726.061L3670.24 709.469C3670.24 708.842 3670.12 708.391 3669.88 708.115 3669.65 707.84 3669.32 707.702 3668.91 707.702 3668.53 707.702 3667.99 707.878 3667.29 708.23 3666.6 708.582 3665.45 709.301 3663.86 710.387L3662.46 707.955Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3662.46" y="728.503">𝟏</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3689.43 719)">)</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3379.01 873)">log prob </text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 3263.33 950)">evaluated at</text><path d="M3649.45 921.608C3647.31 921.608 3645.38 922.473 3643.67 924.202 3641.96 925.931 3640.63 928.249 3639.67 931.155 3638.71 934.062 3638.23 936.963 3638.23 939.858 3638.23 942.025 3638.53 943.645 3639.12 944.718 3639.72 945.791 3640.71 946.327 3642.11 946.327 3643.48 946.327 3644.83 945.733 3646.16 944.546 3647.48 943.358 3648.84 941.666 3650.25 939.468 3651.66 937.27 3652.65 934.733 3653.23 931.858L3653.51 930.515C3653.7 929.66 3653.83 928.921 3653.89 928.296 3653.95 927.671 3653.98 926.994 3653.98 926.265 3653.98 924.66 3653.64 923.483 3652.95 922.733 3652.26 921.983 3651.1 921.608 3649.45 921.608ZM3648.83 919.14C3650.18 919.14 3651.45 919.291 3652.62 919.593 3653.8 919.895 3655.01 920.431 3656.26 921.202L3659.14 919.14 3661.14 919.64 3656.55 939.452C3656.09 941.431 3655.86 942.973 3655.86 944.077 3655.86 944.91 3655.99 945.515 3656.26 945.89 3656.54 946.265 3656.96 946.452 3657.55 946.452 3658.17 946.452 3658.84 946.197 3659.55 945.687 3660.25 945.176 3661.25 944.202 3662.55 942.765L3664.33 944.515C3662.45 946.515 3660.85 947.937 3659.53 948.78 3658.21 949.624 3656.77 950.046 3655.23 950.046 3653.94 950.046 3652.91 949.619 3652.14 948.765 3651.37 947.91 3650.98 946.775 3650.98 945.358 3650.98 944.213 3651.22 942.994 3651.7 941.702L3651.3 941.577C3649.3 944.494 3647.4 946.629 3645.61 947.983 3643.82 949.338 3641.92 950.015 3639.92 950.015 3637.63 950.015 3635.85 949.15 3634.59 947.421 3633.33 945.692 3632.7 943.265 3632.7 940.14 3632.7 936.556 3633.41 933.114 3634.81 929.812 3636.22 926.509 3638.15 923.905 3640.61 921.999 3643.07 920.093 3645.81 919.14 3648.83 919.14Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="3632.7" y="946.183">𝑎</text><path d="M3674.81 933.853 3678.39 933.853 3677.08 939.682 3684.88 939.682 3684.36 942.229 3676.58 942.229 3674.24 951.845C3674.08 952.488 3673.96 953.065 3673.86 953.578 3673.76 954.09 3673.68 954.553 3673.62 954.966 3673.56 955.379 3673.51 955.754 3673.49 956.091 3673.47 956.427 3673.46 956.741 3673.46 957.032 3673.46 957.95 3673.65 958.669 3674.03 959.189 3674.41 959.709 3675.04 959.969 3675.91 959.969 3676.65 959.969 3677.43 959.698 3678.25 959.154 3679.08 958.611 3680.02 957.758 3681.07 956.596L3682.66 958.202C3681.88 959.074 3681.14 959.816 3680.43 960.428 3679.73 961.04 3679.04 961.541 3678.37 961.931 3677.69 962.321 3677.03 962.604 3676.37 962.78 3675.71 962.956 3675.04 963.044 3674.35 963.044 3672.53 963.044 3671.17 962.616 3670.28 961.759 3669.38 960.902 3668.93 959.64 3668.93 957.973 3668.93 957.406 3668.98 956.783 3669.06 956.102 3669.15 955.421 3669.26 954.76 3669.42 954.117L3672.15 942.229 3668.2 942.229 3668.66 940.462C3669.58 940.462 3670.29 940.374 3670.81 940.198 3671.32 940.022 3671.75 939.781 3672.1 939.476 3672.33 939.261 3672.55 938.997 3672.77 938.684 3672.98 938.37 3673.2 937.991 3673.41 937.548 3673.62 937.104 3673.84 936.58 3674.06 935.976 3674.29 935.371 3674.53 934.664 3674.81 933.853Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="3668.2" y="959.395">𝑡</text><path d="M3702.07 935.368 3705.78 935.368 3705.78 947.439 3717.14 947.439 3717.14 950.95 3705.78 950.95 3705.78 963.021 3702.07 963.021 3702.07 950.95 3690.71 950.95 3690.71 947.439 3702.07 947.439Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3690.71" y="959.565">+</text><path d="M3736.12 933.073 3737.52 933.073C3737.49 933.914 3737.47 934.668 3737.45 935.333 3737.44 935.999 3737.42 936.645 3737.42 937.272 3737.41 937.9 3737.41 938.519 3737.41 939.131L3737.41 956.573C3737.41 957.047 3737.42 957.456 3737.45 957.8 3737.48 958.145 3737.53 958.435 3737.6 958.673 3737.67 958.91 3737.76 959.112 3737.88 959.281 3737.99 959.449 3738.12 959.594 3738.28 959.717 3738.46 959.9 3738.71 960.042 3739.01 960.141 3739.32 960.241 3739.71 960.325 3740.18 960.394 3740.66 960.463 3741.23 960.516 3741.89 960.554 3742.56 960.593 3743.36 960.619 3744.29 960.635L3744.29 962.631 3726.09 962.631 3726.09 960.635C3726.99 960.604 3727.76 960.57 3728.39 960.531 3729.01 960.493 3729.55 960.44 3729.99 960.371 3730.44 960.302 3730.79 960.225 3731.06 960.141 3731.33 960.057 3731.56 959.954 3731.76 959.831 3731.97 959.694 3732.16 959.545 3732.31 959.384 3732.46 959.223 3732.59 959.021 3732.68 958.776 3732.77 958.531 3732.84 958.233 3732.9 957.881 3732.95 957.529 3732.98 957.093 3732.98 956.573L3732.98 940.118C3732.98 939.521 3732.86 939.101 3732.63 938.856 3732.4 938.611 3732.07 938.489 3731.64 938.489 3731.17 938.489 3730.47 938.734 3729.53 939.223 3728.6 939.713 3727.39 940.439 3725.91 941.403 3725.71 941.051 3725.52 940.699 3725.36 940.348 3725.19 939.996 3725.01 939.636 3724.83 939.269 3726.73 938.229 3728.6 937.192 3730.46 936.159 3732.32 935.127 3734.21 934.098 3736.12 933.073Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="3724.83" y="958.936">1</text><text fill="#CC99FF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3389.9 1096)">outputs</text><path d="M3396 984 3622.39 984.666" stroke="#CC99FF" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3410 798 3623.19 798" stroke="#CC99FF" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.000360892 144.764" stroke="#CC99FF" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 3143 1127.76)"/><path d="M3130 1141C3130 1133.82 3136.04 1128 3143.5 1128 3150.96 1128 3157 1133.82 3157 1141 3157 1148.18 3150.96 1154 3143.5 1154 3136.04 1154 3130 1148.18 3130 1141Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#BF9000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2997.36 1410)">reduction</text><path d="M3396 984C3396 976.82 3402.04 971 3409.5 971 3416.96 971 3423 976.82 3423 984 3423 991.18 3416.96 997 3409.5 997 3402.04 997 3396 991.18 3396 984Z" fill="#CC99FF" fill-rule="evenodd"/><path d="M0 0 0.592224 77.7095" stroke="#CC99FF" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 3143 1231.71)"/><path d="M3143 983 3423.31 983.562" stroke="#CC99FF" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd"/><path d="M3099 1276C3099 1251.7 3118.48 1232 3142.5 1232 3166.52 1232 3186 1251.7 3186 1276 3186 1300.3 3166.52 1320 3142.5 1320 3118.48 1320 3099 1300.3 3099 1276Z" stroke="#CC99FF" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3120.09 1268.83 3135.33 1268.83 3135.33 1253.22 3149.67 1253.22 3149.67 1268.83 3164.91 1268.83 3164.91 1283.17 3149.67 1283.17 3149.67 1298.78 3135.33 1298.78 3135.33 1283.17 3120.09 1283.17Z" fill="#CC99FF" fill-rule="evenodd"/><text fill="#BFBFBF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="48" transform="matrix(1 0 0 1 2809.88 988)">… </text><rect x="2722" y="1089" width="44.9998" height="95.9999" fill="#BFBFBF"/><rect x="2767" y="1116" width="45" height="68.9998" fill="#7F7F7F"/><rect x="2812" y="1058" width="45" height="127" fill="#BFBFBF"/><rect x="2857" y="1079" width="45" height="106" fill="#7F7F7F"/><rect x="2902" y="1135" width="45" height="48.9998" fill="#BFBFBF"/><path d="M0 0 280.012 0.000360892" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="#BFBFBF" fill-rule="evenodd" transform="matrix(-1 0 0 1 2974.51 1186.5)"/><text fill="#BFBFBF" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2730.58 1241)">1  2  3 … n</text><text fill="#BFBFBF" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2695.78 1063)">𝑃</text><text fill="#BFBFBF" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2722.57 1063)">(</text><text fill="#BFBFBF" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2739.76 1063)">𝑥</text><text fill="#BFBFBF" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2762.77 1063)">)</text><rect x="2722" y="842" width="44.9998" height="95.0002" fill="#BFBFBF"/><rect x="2767" y="868" width="45" height="69.0001" fill="#7F7F7F"/><rect x="2812" y="810" width="45" height="127" fill="#BFBFBF"/><rect x="2857" y="831" width="45" height="106" fill="#7F7F7F"/><rect x="2902" y="888" width="45" height="48.0001" fill="#BFBFBF"/><path d="M0 0 280.012 0.000360892" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="#BFBFBF" fill-rule="evenodd" transform="matrix(-1 0 0 1 2974.51 938.5)"/><rect x="2722" y="674" width="44.9998" height="94.9999" fill="#BFBFBF"/><rect x="2767" y="700" width="45" height="69.0001" fill="#7F7F7F"/><rect x="2812" y="642" width="45" height="127" fill="#BFBFBF"/><rect x="2857" y="663" width="45" height="106" fill="#7F7F7F"/><rect x="2902" y="720" width="45" height="47.9998" fill="#BFBFBF"/><path d="M0 0 280.012 0.000360892" stroke="#BFBFBF" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="#BFBFBF" fill-rule="evenodd" transform="matrix(-1 0 0 1 2974.51 770.5)"/></g></svg>
diff --git a/docs/source/_static/imgs/model_multicategorical-light.svg b/docs/source/_static/imgs/model_multicategorical-light.svg
new file mode 100755
index 00000000..e5e21a81
--- /dev/null
+++ b/docs/source/_static/imgs/model_multicategorical-light.svg
@@ -0,0 +1 @@
+<svg width="4026" height="1394" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-341 -70)"><path d="M1056 583C1056 558.699 1075.7 539 1100 539 1124.3 539 1144 558.699 1144 583 1144 607.301 1124.3 627 1100 627 1075.7 627 1056 607.301 1056 583Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 735.5C1056 711.476 1075.7 692 1100 692 1124.3 692 1144 711.476 1144 735.5 1144 759.524 1124.3 779 1100 779 1075.7 779 1056 759.524 1056 735.5Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 888C1056 863.699 1075.7 844 1100 844 1124.3 844 1144 863.699 1144 888 1144 912.3 1124.3 932 1100 932 1075.7 932 1056 912.3 1056 888Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 1041C1056 1016.7 1075.7 997 1100 997 1124.3 997 1144 1016.7 1144 1041 1144 1065.3 1124.3 1085 1100 1085 1075.7 1085 1056 1065.3 1056 1041Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1056 1193C1056 1168.7 1075.7 1149 1100 1149 1124.3 1149 1144 1168.7 1144 1193 1144 1217.3 1124.3 1237 1100 1237 1075.7 1237 1056 1217.3 1056 1193Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 583C1286 558.699 1305.7 539 1330 539 1354.3 539 1374 558.699 1374 583 1374 607.301 1354.3 627 1330 627 1305.7 627 1286 607.301 1286 583Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 735.5C1286 711.476 1305.7 692 1330 692 1354.3 692 1374 711.476 1374 735.5 1374 759.524 1354.3 779 1330 779 1305.7 779 1286 759.524 1286 735.5Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 888C1286 863.699 1305.7 844 1330 844 1354.3 844 1374 863.699 1374 888 1374 912.3 1354.3 932 1330 932 1305.7 932 1286 912.3 1286 888Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 1041C1286 1016.7 1305.7 997 1330 997 1354.3 997 1374 1016.7 1374 1041 1374 1065.3 1354.3 1085 1330 1085 1305.7 1085 1286 1065.3 1286 1041Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1286 1193C1286 1168.7 1305.7 1149 1330 1149 1354.3 1149 1374 1168.7 1374 1193 1374 1217.3 1354.3 1237 1330 1237 1305.7 1237 1286 1217.3 1286 1193Z" fill="#5B9BD5" fill-rule="evenodd"/><path d="M1516 735.5C1516 711.476 1535.7 692 1560 692 1584.3 692 1604 711.476 1604 735.5 1604 759.524 1584.3 779 1560 779 1535.7 779 1516 759.524 1516 735.5Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1516 888C1516 863.699 1535.7 844 1560 844 1584.3 844 1604 863.699 1604 888 1604 912.3 1584.3 932 1560 932 1535.7 932 1516 912.3 1516 888Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1516 1041C1516 1016.7 1535.7 997 1560 997 1584.3 997 1604 1016.7 1604 1041 1604 1065.3 1584.3 1085 1560 1085 1535.7 1085 1516 1065.3 1516 1041Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M826 735.5C826 711.476 845.7 692 870 692 894.301 692 914 711.476 914 735.5 914 759.524 894.301 779 870 779 845.7 779 826 759.524 826 735.5Z" fill="#70AD47" fill-rule="evenodd"/><path d="M826 888C826 863.699 845.7 844 870 844 894.301 844 914 863.699 914 888 914 912.3 894.301 932 870 932 845.7 932 826 912.3 826 888Z" fill="#70AD47" fill-rule="evenodd"/><path d="M826 1041C826 1016.7 845.7 997 870 997 894.301 997 914 1016.7 914 1041 914 1065.3 894.301 1085 870 1085 845.7 1085 826 1065.3 826 1041Z" fill="#70AD47" fill-rule="evenodd"/><path d="M1390.5 583.5 1500.84 735.808" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 735.5 1500.84 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 887.809)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1192.43)"/><path d="M1500.84 887.809 1390.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 888.5 1500.84 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1040.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1193.12)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1390.5 1192.81)"/><path d="M1390.5 1040.5 1500.84 1040.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 888.5 1500.84 1040.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 735.5 1500.84 1040.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1390.5 583.5 1500.84 1040.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 583.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 735.808)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 888.117)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.43)"/><path d="M0 0 110.337 609.234" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.73)"/><path d="M1160.5 583.5 1270.84 735.808" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 887.809)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.43)"/><path d="M1160.5 583.5 1270.84 888.117" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 887.809" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1040.81)"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1193.12)"/><path d="M1160.5 1192.5 1270.84 1192.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 1040.5 1270.84 1192.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 1193.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 1192.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 1192.73" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 1160.5 1192.81)"/><path d="M1160.5 1040.5 1270.84 1040.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 888.5 1270.84 1040.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 735.5 1270.84 1040.12" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1160.5 583.5 1270.84 1040.43" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M929.5 1040.5 1039.84 1192.81" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 1040.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 888.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 888.5)"/><path d="M1039.84 887.809 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 583.5)"/><path d="M0 0 110.337 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 735.5)"/><path d="M1039.84 1040.12 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1039.84 1192.43 929.5 735.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 583.5)"/><path d="M0 0 110.337 152.309" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1039.84 735.5)"/><path d="M1039.84 1040.81 929.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1039.84 1193.12 929.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 110.337 304.617" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 929.5 1040.12)"/><path d="M0 0 110.337 456.925" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 929.5 1040.43)"/><path d="M1390.5 583.5 1500.84 888.117" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 813.962 590)">input</text><text fill="#5B9BD5" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1116.98 500)">hidden</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1461.05 591)">output</text><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 809.622 735.5)"/><path d="M809.622 888.5 728.5 888.5" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 81.1221 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 809.622 1040.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 1040.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 888.5)"/><path d="M0 0 81.1214 0.000360892" stroke="#D9D9D9" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1701.62 735.5)"/><rect x="765" y="400" width="900" height="871" stroke="#BFBFBF" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><path d="M1733 705C1745.15 705 1755 706.642 1755 708.666L1755 884.334C1755 886.359 1764.85 888 1777 888 1764.85 888 1755 889.642 1755 891.667L1755 1067.33C1755 1069.36 1745.15 1071 1733 1071" stroke="#ED7D31" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1841 888C1841 880.82 1846.82 875 1854 875 1861.18 875 1867 880.82 1867 888 1867 895.18 1861.18 901 1854 901 1846.82 901 1841 895.18 1841 888Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1985 759C1985 751.82 1990.82 746 1998 746 2005.18 746 2011 751.82 2011 759 2011 766.18 2005.18 772 1998 772 1990.82 772 1985 766.18 1985 759Z" fill="#C00000" fill-rule="evenodd"/><path d="M1981 1016.5C1981 1009.04 1987.04 1003 1994.5 1003 2001.96 1003 2008 1009.04 2008 1016.5 2008 1023.96 2001.96 1030 1994.5 1030 1987.04 1030 1981 1023.96 1981 1016.5Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1984.52 1006.9 1863 897" stroke="#ED7D31" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 35.1538 0.000360892" stroke="#ED7D31" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 1841.65 888.5)"/><path d="M0 0 30.3752 0.000360892" stroke="#C00000" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2041.88 759.5)"/><path d="M0 0 34.2415 0.000360892" stroke="#ED7D31" stroke-width="4.58333" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2042.74 1016.5)"/><path d="M1924 965 1924 1233.34" stroke="#ED7D31" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M1911 952C1911 944.82 1916.82 939 1924 939 1931.18 939 1937 944.82 1937 952 1937 959.18 1931.18 965 1924 965 1916.82 965 1911 959.18 1911 952Z" fill="#ED7D31" fill-rule="evenodd"/><path d="M1880 1277.5C1880 1253.48 1899.7 1234 1924 1234 1948.3 1234 1968 1253.48 1968 1277.5 1968 1301.52 1948.3 1321 1924 1321 1899.7 1321 1880 1301.52 1880 1277.5Z" stroke="#ED7D31" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M1898 1277.5C1898 1262.86 1909.86 1251 1924.5 1251 1939.14 1251 1951 1262.86 1951 1277.5 1951 1292.14 1939.14 1304 1924.5 1304 1909.86 1304 1898 1292.14 1898 1277.5Z" fill="#ED7D31" fill-rule="evenodd"/><text fill="#7F6000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 1561.87 1410)">unnormalized_log_prob</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2105.84 1003)">log probabilities</text><text fill="#ED7D31" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2232.15 1080)">(logits)</text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2152.74 743)">probabilities</text><text fill="#C00000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2222.34 820)">(probs)</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 380.469 910)">inputs (</text><path d="M630.625 878.045C632.229 878.045 633.953 878.175 635.797 878.436 637.641 878.696 639.323 879.045 640.844 879.482L639.219 886.951 635.25 886.951C635.063 885.014 634.568 883.587 633.766 882.67 632.964 881.753 631.813 881.295 630.313 881.295 628.917 881.295 627.797 881.654 626.953 882.373 626.109 883.092 625.688 884.066 625.688 885.295 625.688 886.378 626.042 887.326 626.75 888.139 627.458 888.951 628.833 889.972 630.875 891.201 633.083 892.514 634.667 893.868 635.625 895.264 636.583 896.659 637.063 898.295 637.063 900.17 637.063 902.295 636.495 904.107 635.359 905.607 634.224 907.107 632.672 908.222 630.703 908.951 628.734 909.68 626.49 910.045 623.969 910.045 619.927 910.045 616.052 909.545 612.344 908.545L614.094 900.795 618.063 900.795C618.167 902.67 618.651 904.139 619.516 905.201 620.38 906.264 621.708 906.795 623.5 906.795 625.146 906.795 626.417 906.394 627.313 905.592 628.208 904.79 628.656 903.659 628.656 902.201 628.656 901.326 628.51 900.587 628.219 899.982 627.927 899.378 627.448 898.79 626.781 898.217 626.115 897.644 625.031 896.899 623.531 895.982 621.448 894.732 619.932 893.436 618.984 892.092 618.036 890.748 617.563 889.243 617.563 887.576 617.563 884.514 618.708 882.159 621 880.514 623.292 878.868 626.5 878.045 630.625 878.045Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="612.344" y="906.045">𝒔</text><path d="M652.136 894.127 657.644 894.127 656.313 899.865 661.89 899.865 661.27 902.733 655.647 902.733 653.421 911.913C653.039 913.489 652.775 914.686 652.63 915.505 652.484 916.323 652.411 917.015 652.411 917.582 652.411 918.301 652.542 918.825 652.802 919.154 653.062 919.482 653.444 919.647 653.949 919.647 654.638 919.647 655.345 919.402 656.072 918.913 656.799 918.423 657.583 917.666 658.424 916.641L660.191 918.454C658.646 920.152 657.227 921.334 655.934 921.999 654.641 922.665 653.115 922.998 651.356 922.998 649.612 922.998 648.273 922.55 647.34 921.655 646.406 920.76 645.94 919.525 645.94 917.949 645.94 917.23 645.993 916.545 646.1 915.895 646.208 915.245 646.429 914.177 646.766 912.693L649.13 902.733 645.55 902.733 645.986 900.875C647.057 900.875 647.868 900.775 648.418 900.576 648.969 900.377 649.432 900.079 649.807 899.681 650.182 899.283 650.545 898.679 650.897 897.868 651.249 897.057 651.662 895.81 652.136 894.127Z" fill="#70AD47" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="645.55" y="919.389">𝒕</text><text fill="#70AD47" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 665.002 910)">)</text><rect x="729" y="208" width="3063" height="1243" stroke="#BFBFBF" stroke-width="10.3125" stroke-linecap="square" stroke-miterlimit="8" stroke-dasharray="30.9375 10.3125" fill="none"/><path d="M2570 667C2582.15 667 2592 668.641 2592 670.666L2592 884.833C2592 886.858 2601.85 888.5 2614 888.5 2601.85 888.5 2592 890.141 2592 892.166L2592 1106.33C2592 1108.36 2582.15 1110 2570 1110" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2679.26 499)">categorical</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 2663.22 576)">distributions</text><rect x="2720" y="673" width="45" height="95.9999" fill="#595959"/><rect x="2765" y="700" width="45" height="69.0001" fill="#7F7F7F"/><rect x="2810" y="642" width="44.9998" height="127" fill="#595959"/><rect x="2855" y="663" width="45" height="106" fill="#7F7F7F"/><rect x="2900" y="719" width="45" height="48.9998" fill="#595959"/><path d="M0 0 280.012 0.000360892" stroke="#595959" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2972.51 770.5)"/><path d="M3035 667C3047.43 667 3057.5 668.679 3057.5 670.75L3057.5 884.75C3057.5 886.821 3067.57 888.5 3080 888.5 3067.57 888.5 3057.5 890.179 3057.5 892.25L3057.5 1106.25C3057.5 1108.32 3047.43 1110 3035 1110" stroke="#595959" stroke-width="2.29167" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3871.92 720)">actions (</text><path d="M4156.25 691.567C4154.31 691.567 4152.59 692.541 4151.09 694.489 4149.59 696.437 4148.38 699.057 4147.47 702.349 4146.55 705.64 4146.09 708.588 4146.09 711.192 4146.09 712.713 4146.32 713.849 4146.76 714.599 4147.21 715.349 4147.9 715.724 4148.84 715.724 4150.22 715.724 4151.62 715.052 4153.05 713.708 4154.47 712.364 4155.62 710.786 4156.5 708.974 4157.37 707.161 4158.15 704.682 4158.84 701.536L4159.09 700.38C4159.43 698.838 4159.59 697.297 4159.59 695.755 4159.59 694.38 4159.33 693.338 4158.81 692.63 4158.29 691.922 4157.44 691.567 4156.25 691.567ZM4155.87 688.317C4158.98 688.317 4161.62 689.067 4163.81 690.567L4166.84 688.317 4170.34 688.817 4165.28 710.067C4164.97 711.38 4164.81 712.567 4164.81 713.63 4164.81 714.317 4164.93 714.849 4165.15 715.224 4165.38 715.599 4165.78 715.786 4166.34 715.786 4166.99 715.786 4167.69 715.494 4168.44 714.911 4169.19 714.328 4170.06 713.463 4171.06 712.317L4173.31 714.567C4171.12 716.755 4169.24 718.26 4167.65 719.083 4166.07 719.906 4164.4 720.317 4162.65 720.317 4160.9 720.317 4159.52 719.833 4158.5 718.864 4157.48 717.895 4156.97 716.619 4156.97 715.036 4156.97 714.515 4157.03 714.015 4157.15 713.536L4156.72 713.474C4155.2 715.265 4153.89 716.614 4152.8 717.52 4151.7 718.427 4150.54 719.119 4149.31 719.599 4148.08 720.078 4146.72 720.317 4145.22 720.317 4142.72 720.317 4140.82 719.468 4139.53 717.77 4138.24 716.073 4137.59 713.599 4137.59 710.349 4137.59 707.536 4138 704.744 4138.81 701.974 4139.62 699.203 4140.79 696.812 4142.31 694.802 4143.83 692.791 4145.74 691.208 4148.05 690.052 4150.35 688.895 4152.96 688.317 4155.87 688.317Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="4137.59" y="716.317">𝒂</text><path d="M4183.69 704.4 4189.19 704.4 4187.86 710.137 4193.44 710.137 4192.82 713.006 4187.2 713.006 4184.97 722.185C4184.59 723.761 4184.33 724.958 4184.18 725.777 4184.03 726.596 4183.96 727.288 4183.96 727.854 4183.96 728.573 4184.09 729.097 4184.35 729.426 4184.61 729.755 4184.99 729.919 4185.5 729.919 4186.19 729.919 4186.9 729.675 4187.62 729.185 4188.35 728.695 4189.13 727.938 4189.97 726.913L4191.74 728.726C4190.2 730.424 4188.78 731.606 4187.48 732.272 4186.19 732.937 4184.67 733.27 4182.91 733.27 4181.16 733.27 4179.82 732.822 4178.89 731.927 4177.96 731.032 4177.49 729.797 4177.49 728.221 4177.49 727.502 4177.54 726.817 4177.65 726.167 4177.76 725.517 4177.98 724.45 4178.32 722.966L4180.68 713.006 4177.1 713.006 4177.54 711.147C4178.61 711.147 4179.42 711.047 4179.97 710.849 4180.52 710.65 4180.98 710.351 4181.36 709.954 4181.73 709.556 4182.1 708.951 4182.45 708.141 4182.8 707.33 4183.21 706.083 4183.69 704.4Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="4177.1" y="729.661">𝒕</text><path d="M4209.13 705.639 4212.85 705.639 4212.85 717.71 4224.21 717.71 4224.21 721.222 4212.85 721.222 4212.85 733.293 4209.13 733.293 4209.13 721.222 4197.77 721.222 4197.77 717.71 4209.13 717.71Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="4197.77" y="729.836">+</text><path d="M4243.44 701.21 4246.38 701.21C4246.29 702.679 4246.24 704.714 4246.24 707.314L4246.24 726.5C4246.24 727.387 4246.29 728.057 4246.39 728.508 4246.49 728.959 4246.66 729.327 4246.92 729.61 4247.17 729.893 4247.53 730.114 4248.01 730.275 4248.48 730.436 4249.08 730.558 4249.8 730.642 4250.52 730.726 4251.46 730.784 4252.64 730.814L4252.64 732.903 4232.84 732.903 4232.84 730.814C4234.54 730.738 4235.75 730.635 4236.47 730.505 4237.2 730.375 4237.76 730.179 4238.14 729.919 4238.52 729.659 4238.8 729.292 4238.98 728.818 4239.15 728.344 4239.24 727.571 4239.24 726.5L4239.24 709.908C4239.24 709.28 4239.12 708.829 4238.88 708.554 4238.65 708.278 4238.32 708.141 4237.91 708.141 4237.53 708.141 4236.99 708.317 4236.29 708.668 4235.59 709.02 4234.45 709.739 4232.86 710.826L4231.46 708.393Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="4231.46" y="728.941">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 4258.43 720)">)</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3948.01 873)">log prob</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 3832.33 950)">evaluated at</text><path d="M4218.45 922.047C4216.31 922.047 4214.38 922.911 4212.67 924.641 4210.96 926.37 4209.63 928.688 4208.67 931.594 4207.71 934.5 4207.23 937.401 4207.23 940.297 4207.23 942.464 4207.53 944.083 4208.12 945.156 4208.72 946.229 4209.71 946.766 4211.11 946.766 4212.48 946.766 4213.83 946.172 4215.15 944.984 4216.48 943.797 4217.84 942.104 4219.25 939.906 4220.65 937.708 4221.65 935.172 4222.23 932.297L4222.51 930.953C4222.7 930.099 4222.83 929.359 4222.89 928.734 4222.95 928.109 4222.98 927.432 4222.98 926.703 4222.98 925.099 4222.64 923.922 4221.95 923.172 4221.26 922.422 4220.1 922.047 4218.45 922.047ZM4217.83 919.578C4219.18 919.578 4220.45 919.729 4221.62 920.031 4222.8 920.333 4224.01 920.87 4225.26 921.641L4228.14 919.578 4230.14 920.078 4225.55 939.891C4225.09 941.87 4224.86 943.411 4224.86 944.516 4224.86 945.349 4224.99 945.953 4225.26 946.328 4225.54 946.703 4225.96 946.891 4226.55 946.891 4227.17 946.891 4227.84 946.635 4228.55 946.125 4229.25 945.615 4230.25 944.641 4231.55 943.203L4233.33 944.953C4231.45 946.953 4229.85 948.375 4228.53 949.219 4227.21 950.062 4225.77 950.484 4224.23 950.484 4222.94 950.484 4221.91 950.057 4221.14 949.203 4220.37 948.349 4219.98 947.214 4219.98 945.797 4219.98 944.651 4220.22 943.432 4220.7 942.141L4220.3 942.016C4218.3 944.932 4216.4 947.068 4214.61 948.422 4212.82 949.776 4210.92 950.453 4208.92 950.453 4206.63 950.453 4204.85 949.589 4203.59 947.859 4202.33 946.13 4201.7 943.703 4201.7 940.578 4201.7 936.995 4202.4 933.552 4203.81 930.25 4205.22 926.948 4207.15 924.344 4209.61 922.438 4212.07 920.531 4214.81 919.578 4217.83 919.578Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="4201.7" y="946.621">𝑎</text><path d="M4243.81 934.292 4247.39 934.292 4246.08 940.121 4253.88 940.121 4253.36 942.668 4245.58 942.668 4243.24 952.284C4243.08 952.926 4242.96 953.504 4242.86 954.016 4242.76 954.529 4242.68 954.992 4242.62 955.405 4242.56 955.818 4242.51 956.193 4242.49 956.529 4242.47 956.866 4242.46 957.18 4242.46 957.47 4242.46 958.388 4242.65 959.107 4243.03 959.627 4243.41 960.148 4244.04 960.408 4244.91 960.408 4245.65 960.408 4246.43 960.136 4247.25 959.593 4248.08 959.05 4249.02 958.197 4250.07 957.034L4251.66 958.641C4250.88 959.513 4250.14 960.255 4249.43 960.867 4248.73 961.479 4248.04 961.98 4247.37 962.37 4246.69 962.76 4246.03 963.043 4245.37 963.219 4244.71 963.395 4244.04 963.483 4243.35 963.483 4241.53 963.483 4240.17 963.055 4239.28 962.198 4238.38 961.341 4237.93 960.079 4237.93 958.411 4237.93 957.845 4237.98 957.222 4238.06 956.541 4238.15 955.86 4238.26 955.198 4238.42 954.556L4241.15 942.668 4237.2 942.668 4237.66 940.901C4238.58 940.901 4239.29 940.813 4239.8 940.637 4240.32 940.461 4240.75 940.22 4241.1 939.914 4241.33 939.7 4241.55 939.436 4241.77 939.122 4241.98 938.809 4242.2 938.43 4242.41 937.986 4242.62 937.543 4242.84 937.019 4243.06 936.414 4243.29 935.81 4243.53 935.102 4243.81 934.292Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="4237.2" y="959.834">𝑡</text><path d="M4271.06 935.806 4274.78 935.806 4274.78 947.877 4286.14 947.877 4286.14 951.389 4274.78 951.389 4274.78 963.46 4271.06 963.46 4271.06 951.389 4259.71 951.389 4259.71 947.877 4271.06 947.877Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="4259.71" y="960.003">+</text><path d="M4305.12 933.511 4306.52 933.511C4306.49 934.353 4306.47 935.106 4306.45 935.772 4306.44 936.437 4306.42 937.084 4306.42 937.711 4306.41 938.338 4306.4 938.958 4306.4 939.57L4306.4 957.011C4306.4 957.486 4306.42 957.895 4306.45 958.239 4306.48 958.583 4306.53 958.874 4306.6 959.111 4306.67 959.348 4306.76 959.551 4306.88 959.719 4306.99 959.888 4307.12 960.033 4307.28 960.155 4307.46 960.339 4307.71 960.48 4308.01 960.58 4308.32 960.679 4308.71 960.763 4309.18 960.832 4309.66 960.901 4310.23 960.955 4310.89 960.993 4311.56 961.031 4312.36 961.058 4313.29 961.073L4313.29 963.07 4295.09 963.07 4295.09 961.073C4295.99 961.043 4296.76 961.008 4297.39 960.97 4298.01 960.932 4298.55 960.878 4298.99 960.809 4299.44 960.74 4299.79 960.664 4300.06 960.58 4300.33 960.496 4300.56 960.392 4300.76 960.27 4300.97 960.132 4301.16 959.983 4301.31 959.823 4301.46 959.662 4301.59 959.459 4301.68 959.214 4301.77 958.97 4301.84 958.671 4301.9 958.319 4301.95 957.967 4301.98 957.531 4301.98 957.011L4301.98 940.557C4301.98 939.96 4301.86 939.539 4301.63 939.294 4301.4 939.05 4301.07 938.927 4300.64 938.927 4300.17 938.927 4299.47 939.172 4298.53 939.662 4297.6 940.151 4296.39 940.878 4294.91 941.842 4294.71 941.49 4294.52 941.138 4294.36 940.786 4294.19 940.434 4294.01 940.075 4293.83 939.708 4295.73 938.667 4297.6 937.631 4299.46 936.598 4301.32 935.565 4303.21 934.536 4305.12 933.511Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="4293.83" y="959.375">1</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3958.9 1096)">outputs</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="73" transform="matrix(1 0 0 1 886.675 354)">compute / __call__</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="83" transform="matrix(1 0 0 1 2201.33 175)">act</text><rect x="2721" y="842" width="45" height="95.0002" fill="#595959"/><rect x="2766" y="868" width="44.9998" height="69.0001" fill="#7F7F7F"/><rect x="2811" y="810" width="45" height="127" fill="#595959"/><rect x="2856" y="831" width="45" height="106" fill="#7F7F7F"/><rect x="2901" y="888" width="45" height="48.0001" fill="#595959"/><path d="M0 0 280.012 0.000360892" stroke="#595959" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2973.51 938.5)"/><rect x="2721" y="1089" width="45" height="94.9999" fill="#595959"/><rect x="2766" y="1115" width="44.9998" height="68.9998" fill="#7F7F7F"/><rect x="2811" y="1057" width="46" height="127" fill="#595959"/><rect x="2857" y="1078" width="45" height="106" fill="#7F7F7F"/><rect x="2902" y="1135" width="45" height="47.9998" fill="#595959"/><path d="M0 0 280.012 0.000360892" stroke="#595959" stroke-width="6.875" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(-1 0 0 1 2974.51 1185.5)"/><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2730.58 1240)">1  2  3 … n</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2696.06 1063)">𝑃</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2722.85 1063)">(</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2740.04 1063)">𝑥</text><text fill="#595959" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="41" transform="matrix(1 0 0 1 2763.05 1063)">)</text><text fill="#595959" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="48" transform="matrix(1 0 0 1 2809.88 988)">… </text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3302.36 720)">actions (</text><path d="M3586.69 691.587C3584.75 691.587 3583.04 692.561 3581.54 694.509 3580.04 696.457 3578.83 699.077 3577.91 702.368 3576.99 705.66 3576.54 708.608 3576.54 711.212 3576.54 712.733 3576.76 713.868 3577.21 714.618 3577.66 715.368 3578.35 715.743 3579.29 715.743 3580.66 715.743 3582.06 715.072 3583.49 713.728 3584.92 712.384 3586.07 710.806 3586.94 708.993 3587.82 707.181 3588.6 704.702 3589.29 701.556L3589.54 700.4C3589.87 698.858 3590.04 697.316 3590.04 695.775 3590.04 694.4 3589.78 693.358 3589.25 692.65 3588.73 691.941 3587.88 691.587 3586.69 691.587ZM3586.32 688.337C3589.42 688.337 3592.07 689.087 3594.25 690.587L3597.29 688.337 3600.79 688.837 3595.72 710.087C3595.41 711.4 3595.25 712.587 3595.25 713.65 3595.25 714.337 3595.37 714.868 3595.6 715.243 3595.83 715.618 3596.22 715.806 3596.79 715.806 3597.43 715.806 3598.13 715.514 3598.88 714.931 3599.63 714.348 3600.5 713.483 3601.5 712.337L3603.75 714.587C3601.57 716.775 3599.68 718.28 3598.1 719.103 3596.52 719.926 3594.85 720.337 3593.1 720.337 3591.35 720.337 3589.96 719.853 3588.94 718.884 3587.92 717.915 3587.41 716.639 3587.41 715.056 3587.41 714.535 3587.47 714.035 3587.6 713.556L3587.16 713.493C3585.64 715.285 3584.33 716.634 3583.24 717.54 3582.15 718.447 3580.98 719.139 3579.75 719.618 3578.53 720.098 3577.16 720.337 3575.66 720.337 3573.16 720.337 3571.27 719.488 3569.97 717.79 3568.68 716.092 3568.04 713.618 3568.04 710.368 3568.04 707.556 3568.44 704.764 3569.25 701.993 3570.07 699.223 3571.23 696.832 3572.75 694.822 3574.28 692.811 3576.19 691.228 3578.49 690.072 3580.79 688.915 3583.4 688.337 3586.32 688.337Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="32" x="3568.04" y="716.337">𝒂</text><path d="M3614.13 704.42 3619.64 704.42 3618.31 710.157 3623.88 710.157 3623.26 713.026 3617.64 713.026 3615.42 722.205C3615.03 723.781 3614.77 724.978 3614.62 725.797 3614.48 726.615 3614.41 727.308 3614.41 727.874 3614.41 728.593 3614.54 729.117 3614.8 729.446 3615.06 729.775 3615.44 729.939 3615.94 729.939 3616.63 729.939 3617.34 729.694 3618.07 729.205 3618.79 728.715 3619.58 727.958 3620.42 726.933L3622.19 728.746C3620.64 730.444 3619.22 731.626 3617.93 732.292 3616.64 732.957 3615.11 733.29 3613.35 733.29 3611.61 733.29 3610.27 732.842 3609.33 731.947 3608.4 731.052 3607.93 729.817 3607.93 728.241 3607.93 727.522 3607.99 726.837 3608.09 726.187 3608.2 725.537 3608.42 724.47 3608.76 722.986L3611.12 713.026 3607.54 713.026 3607.98 711.167C3609.05 711.167 3609.86 711.067 3610.41 710.868 3610.96 710.669 3611.43 710.371 3611.8 709.973 3612.18 709.576 3612.54 708.971 3612.89 708.16 3613.24 707.35 3613.66 706.103 3614.13 704.42Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="28.8701" x="3607.54" y="729.681">𝒕</text><path d="M3639.58 705.659 3643.29 705.659 3643.29 717.73 3654.65 717.73 3654.65 721.241 3643.29 721.241 3643.29 733.313 3639.58 733.313 3639.58 721.241 3628.22 721.241 3628.22 717.73 3639.58 717.73Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3628.22" y="729.856">+</text><path d="M3673.88 701.23 3676.82 701.23C3676.73 702.698 3676.68 704.733 3676.68 707.334L3676.68 726.52C3676.68 727.407 3676.73 728.076 3676.83 728.528 3676.93 728.979 3677.11 729.346 3677.36 729.629 3677.61 729.912 3677.98 730.134 3678.45 730.295 3678.92 730.456 3679.52 730.578 3680.24 730.662 3680.96 730.746 3681.91 730.804 3683.09 730.834L3683.09 732.923 3663.28 732.923 3663.28 730.834C3664.98 730.758 3666.19 730.654 3666.92 730.524 3667.65 730.394 3668.2 730.199 3668.58 729.939 3668.96 729.679 3669.24 729.312 3669.42 728.838 3669.6 728.363 3669.68 727.591 3669.68 726.52L3669.68 709.927C3669.68 709.3 3669.56 708.849 3669.33 708.573 3669.09 708.298 3668.77 708.16 3668.35 708.16 3667.97 708.16 3667.43 708.336 3666.73 708.688 3666.04 709.04 3664.89 709.759 3663.3 710.845L3661.9 708.413Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="31.6929" x="3661.9" y="728.961">𝟏</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3688.87 720)">)</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3378.45 873)">log prob </text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 3262.77 950)">evaluated at</text><path d="M3648.9 922.067C3646.75 922.067 3644.82 922.931 3643.11 924.66 3641.41 926.39 3640.07 928.707 3639.11 931.614 3638.16 934.52 3637.68 937.421 3637.68 940.317 3637.68 942.483 3637.97 944.103 3638.57 945.176 3639.16 946.249 3640.16 946.785 3641.55 946.785 3642.93 946.785 3644.28 946.192 3645.6 945.004 3646.92 943.817 3648.29 942.124 3649.69 939.926 3651.1 937.728 3652.09 935.192 3652.68 932.317L3652.96 930.973C3653.15 930.119 3653.27 929.379 3653.33 928.754 3653.4 928.129 3653.43 927.452 3653.43 926.723 3653.43 925.119 3653.08 923.942 3652.4 923.192 3651.71 922.442 3650.54 922.067 3648.9 922.067ZM3648.27 919.598C3649.62 919.598 3650.89 919.749 3652.07 920.051 3653.24 920.353 3654.46 920.89 3655.71 921.66L3658.58 919.598 3660.58 920.098 3655.99 939.91C3655.53 941.89 3655.3 943.431 3655.3 944.535 3655.3 945.369 3655.44 945.973 3655.71 946.348 3655.98 946.723 3656.41 946.91 3656.99 946.91 3657.61 946.91 3658.28 946.655 3658.99 946.145 3659.7 945.634 3660.7 944.66 3661.99 943.223L3663.77 944.973C3661.9 946.973 3660.3 948.395 3658.97 949.239 3657.65 950.082 3656.22 950.504 3654.68 950.504 3653.39 950.504 3652.35 950.077 3651.58 949.223 3650.81 948.369 3650.43 947.233 3650.43 945.817 3650.43 944.671 3650.67 943.452 3651.15 942.16L3650.74 942.035C3648.74 944.952 3646.84 947.088 3645.05 948.442 3643.26 949.796 3641.36 950.473 3639.36 950.473 3637.07 950.473 3635.3 949.608 3634.04 947.879 3632.78 946.15 3632.15 943.723 3632.15 940.598 3632.15 937.015 3632.85 933.572 3634.25 930.27 3635.66 926.968 3637.59 924.364 3640.05 922.457 3642.51 920.551 3645.25 919.598 3648.27 919.598Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="30.9062" x="3632.15" y="946.641">𝑎</text><path d="M3674.25 934.311 3677.83 934.311 3676.53 940.14 3684.33 940.14 3683.8 942.688 3676.02 942.688 3673.68 952.304C3673.53 952.946 3673.4 953.524 3673.3 954.036 3673.2 954.549 3673.12 955.012 3673.06 955.425 3673 955.838 3672.96 956.213 3672.93 956.549 3672.91 956.886 3672.9 957.199 3672.9 957.49 3672.9 958.408 3673.09 959.127 3673.47 959.647 3673.86 960.167 3674.48 960.428 3675.35 960.428 3676.09 960.428 3676.87 960.156 3677.7 959.613 3678.52 959.07 3679.46 958.217 3680.52 957.054L3682.1 958.66C3681.32 959.533 3680.58 960.275 3679.88 960.887 3679.17 961.499 3678.48 962 3677.81 962.39 3677.14 962.78 3676.47 963.063 3675.81 963.239 3675.16 963.415 3674.48 963.503 3673.79 963.503 3671.97 963.503 3670.62 963.074 3669.72 962.218 3668.83 961.361 3668.38 960.099 3668.38 958.431 3668.38 957.865 3668.42 957.241 3668.5 956.561 3668.59 955.88 3668.71 955.218 3668.86 954.576L3671.59 942.688 3667.64 942.688 3668.1 940.921C3669.02 940.921 3669.74 940.833 3670.25 940.657 3670.76 940.481 3671.19 940.24 3671.55 939.934 3671.77 939.72 3672 939.456 3672.21 939.142 3672.42 938.828 3672.64 938.45 3672.85 938.006 3673.07 937.562 3673.29 937.038 3673.51 936.434 3673.73 935.83 3673.98 935.122 3674.25 934.311Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.1914" x="3667.64" y="959.854">𝑡</text><path d="M3701.51 935.826 3705.23 935.826 3705.23 947.897 3716.59 947.897 3716.59 951.409 3705.23 951.409 3705.23 963.48 3701.51 963.48 3701.51 951.409 3690.15 951.409 3690.15 947.897 3701.51 947.897Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="27.6538" x="3690.15" y="960.023">+</text><path d="M3735.56 933.531 3736.96 933.531C3736.93 934.373 3736.91 935.126 3736.89 935.792 3736.88 936.457 3736.87 937.104 3736.86 937.731 3736.85 938.358 3736.85 938.978 3736.85 939.59L3736.85 957.031C3736.85 957.505 3736.86 957.915 3736.89 958.259 3736.92 958.603 3736.97 958.894 3737.04 959.131 3737.11 959.368 3737.2 959.571 3737.32 959.739 3737.43 959.907 3737.57 960.053 3737.72 960.175 3737.9 960.359 3738.15 960.5 3738.45 960.6 3738.76 960.699 3739.15 960.783 3739.63 960.852 3740.1 960.921 3740.67 960.974 3741.33 961.013 3742 961.051 3742.8 961.078 3743.73 961.093L3743.73 963.09 3725.53 963.09 3725.53 961.093C3726.44 961.062 3727.2 961.028 3727.83 960.99 3728.46 960.952 3728.99 960.898 3729.44 960.829 3729.88 960.76 3730.24 960.684 3730.5 960.6 3730.77 960.516 3731 960.412 3731.2 960.29 3731.42 960.152 3731.6 960.003 3731.75 959.842 3731.91 959.682 3732.03 959.479 3732.12 959.234 3732.21 958.989 3732.29 958.691 3732.34 958.339 3732.39 957.987 3732.42 957.551 3732.42 957.031L3732.42 940.576C3732.42 939.98 3732.3 939.559 3732.07 939.314 3731.85 939.069 3731.52 938.947 3731.09 938.947 3730.61 938.947 3729.91 939.192 3728.98 939.681 3728.04 940.171 3726.83 940.898 3725.35 941.862 3725.15 941.51 3724.97 941.158 3724.8 940.806 3724.63 940.454 3724.46 940.095 3724.27 939.727 3726.17 938.687 3728.05 937.65 3729.91 936.618 3731.77 935.585 3733.65 934.556 3735.56 933.531Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#000000" fill-opacity="0" font-family="Arial,Arial_MSFontService,sans-serif" font-size="29.5586" x="3724.27" y="959.395">1</text><text fill="#7030A0" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 3389.34 1096)">outputs</text><path d="M3396 984 3622.39 984.666" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M3409 799 3622.19 799" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd"/><path d="M0 0 0.000360892 143.409" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 3143 1126.41)"/><path d="M3130 1140C3130 1132.82 3135.82 1127 3143 1127 3150.18 1127 3156 1132.82 3156 1140 3156 1147.18 3150.18 1153 3143 1153 3135.82 1153 3130 1147.18 3130 1140Z" fill="#7030A0" fill-rule="evenodd"/><text fill="#7F6000" font-family="Arial,Arial_MSFontService,sans-serif" font-weight="700" font-size="64" transform="matrix(1 0 0 1 2996.8 1409)">reduction</text><path d="M3396 984C3396 976.82 3401.82 971 3409 971 3416.18 971 3422 976.82 3422 984 3422 991.18 3416.18 997 3409 997 3401.82 997 3396 991.18 3396 984Z" fill="#7030A0" fill-rule="evenodd"/><path d="M0 0 0.592224 77.7095" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 -1 3142 1230.71)"/><path d="M3142 984 3422.31 984.562" stroke="#7030A0" stroke-width="10.3125" stroke-linecap="round" stroke-linejoin="round" stroke-miterlimit="10" stroke-dasharray="20.625 20.625" fill="none" fill-rule="evenodd"/><path d="M3098 1275C3098 1250.7 3117.7 1231 3142 1231 3166.3 1231 3186 1250.7 3186 1275 3186 1299.3 3166.3 1319 3142 1319 3117.7 1319 3098 1299.3 3098 1275Z" stroke="#7030A0" stroke-width="10.3125" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M3119.95 1267.94 3134.94 1267.94 3134.94 1252.22 3149.06 1252.22 3149.06 1267.94 3164.05 1267.94 3164.05 1282.06 3149.06 1282.06 3149.06 1297.78 3134.94 1297.78 3134.94 1282.06 3119.95 1282.06Z" fill="#7030A0" fill-rule="evenodd"/></g></svg>
diff --git a/docs/source/api/agents/a2c.rst b/docs/source/api/agents/a2c.rst
index 23721bc0..c98f6448 100644
--- a/docs/source/api/agents/a2c.rst
+++ b/docs/source/api/agents/a2c.rst
@@ -164,6 +164,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\blacksquare`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -194,7 +197,10 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Categorical <models_categorical>` /
+        |br| :ref:`Multi-Categorical <models_multicategorical>` /
+        |br| :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/agents/amp.rst b/docs/source/api/agents/amp.rst
index 57cc9180..c993b67a 100644
--- a/docs/source/api/agents/amp.rst
+++ b/docs/source/api/agents/amp.rst
@@ -162,6 +162,10 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -194,7 +198,8 @@ The implementation uses 1 stochastic (continuous) and 2 deterministic function a
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/agents/cem.rst b/docs/source/api/agents/cem.rst
index 0c1ac587..68245818 100644
--- a/docs/source/api/agents/cem.rst
+++ b/docs/source/api/agents/cem.rst
@@ -119,6 +119,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\blacksquare`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\square`
@@ -149,7 +152,8 @@ The implementation uses 1 discrete function approximator. This function approxim
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Categorical <models_categorical>`
+      - :ref:`Categorical <models_categorical>` /
+        |br| :ref:`Multi-Categorical <models_multicategorical>`
 
 .. raw:: html
 
diff --git a/docs/source/api/agents/ddpg.rst b/docs/source/api/agents/ddpg.rst
index 86eb2fd0..00972f3f 100644
--- a/docs/source/api/agents/ddpg.rst
+++ b/docs/source/api/agents/ddpg.rst
@@ -159,6 +159,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
diff --git a/docs/source/api/agents/ddqn.rst b/docs/source/api/agents/ddqn.rst
index 2ddcc873..91744c33 100644
--- a/docs/source/api/agents/ddqn.rst
+++ b/docs/source/api/agents/ddqn.rst
@@ -119,6 +119,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\square`
diff --git a/docs/source/api/agents/dqn.rst b/docs/source/api/agents/dqn.rst
index 82626f21..605e57ad 100644
--- a/docs/source/api/agents/dqn.rst
+++ b/docs/source/api/agents/dqn.rst
@@ -119,6 +119,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\square`
diff --git a/docs/source/api/agents/ppo.rst b/docs/source/api/agents/ppo.rst
index e68dfb36..0bf7b37b 100644
--- a/docs/source/api/agents/ppo.rst
+++ b/docs/source/api/agents/ppo.rst
@@ -180,6 +180,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\blacksquare`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -210,7 +213,10 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Categorical <models_categorical>` /
+        |br| :ref:`Multi-Categorical <models_multicategorical>` /
+        |br| :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/agents/q_learning.rst b/docs/source/api/agents/q_learning.rst
index 89452096..29c1b29b 100644
--- a/docs/source/api/agents/q_learning.rst
+++ b/docs/source/api/agents/q_learning.rst
@@ -99,6 +99,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
diff --git a/docs/source/api/agents/rpo.rst b/docs/source/api/agents/rpo.rst
index 4c43fecf..61947dff 100644
--- a/docs/source/api/agents/rpo.rst
+++ b/docs/source/api/agents/rpo.rst
@@ -219,6 +219,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -249,7 +252,8 @@ The implementation uses 1 continuous stochastic and 1 deterministic function app
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/agents/sac.rst b/docs/source/api/agents/sac.rst
index 457b0542..c55720cc 100644
--- a/docs/source/api/agents/sac.rst
+++ b/docs/source/api/agents/sac.rst
@@ -160,6 +160,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -190,7 +193,8 @@ The implementation uses 1 stochastic and 4 deterministic function approximators.
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`Q_{\phi 1}(s, a)`
       - Q1-network (critic 1)
       - :literal:`"critic_1"`
diff --git a/docs/source/api/agents/sarsa.rst b/docs/source/api/agents/sarsa.rst
index 87a6d8bf..e7759b54 100644
--- a/docs/source/api/agents/sarsa.rst
+++ b/docs/source/api/agents/sarsa.rst
@@ -99,6 +99,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
diff --git a/docs/source/api/agents/td3.rst b/docs/source/api/agents/td3.rst
index 895f9c10..ee200de7 100644
--- a/docs/source/api/agents/td3.rst
+++ b/docs/source/api/agents/td3.rst
@@ -169,6 +169,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
diff --git a/docs/source/api/agents/trpo.rst b/docs/source/api/agents/trpo.rst
index 61f934d6..21460fe9 100644
--- a/docs/source/api/agents/trpo.rst
+++ b/docs/source/api/agents/trpo.rst
@@ -216,6 +216,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\square`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\square`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -246,7 +249,8 @@ The implementation uses 1 stochastic and 1 deterministic function approximator.
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
index 48083b81..522d5eb6 100644
--- a/docs/source/api/models.rst
+++ b/docs/source/api/models.rst
@@ -6,6 +6,7 @@ Models
 
     Tabular <models/tabular>
     Categorical <models/categorical>
+    Multi-Categorical <models/multicategorical>
     Gaussian <models/gaussian>
     Multivariate Gaussian <models/multivariate_gaussian>
     Deterministic <models/deterministic>
@@ -29,6 +30,9 @@ Models (or agent models) refer to a representation of the agent's policy, value
     * - :doc:`Categorical model <models/categorical>` (discrete domain)
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
+    * - :doc:`Multi-Categorical model <models/multicategorical>` (discrete domain)
+      - .. centered:: :math:`\blacksquare`
+      - .. centered:: :math:`\square`
     * - :doc:`Gaussian model <models/gaussian>` (continuous domain)
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
diff --git a/docs/source/api/models/categorical.rst b/docs/source/api/models/categorical.rst
index 19543ab4..c285896d 100644
--- a/docs/source/api/models/categorical.rst
+++ b/docs/source/api/models/categorical.rst
@@ -163,6 +163,24 @@ Usage
                             :start-after: [start-cnn-functional-torch]
                             :end-before: [end-cnn-functional-torch]
 
+            .. group-tab:: |_4| |jax| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: setup-style
+
+                        .. literalinclude:: ../../snippets/categorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-setup-jax]
+                            :end-before: [end-cnn-setup-jax]
+
+                    .. group-tab:: compact-style
+
+                        .. literalinclude:: ../../snippets/categorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-compact-jax]
+                            :end-before: [end-cnn-compact-jax]
+
     .. tab:: RNN
 
         .. image:: ../../_static/imgs/model_categorical_rnn-light.svg
diff --git a/docs/source/api/models/deterministic.rst b/docs/source/api/models/deterministic.rst
index e5c24b87..30e5ef91 100644
--- a/docs/source/api/models/deterministic.rst
+++ b/docs/source/api/models/deterministic.rst
@@ -163,6 +163,24 @@ Usage
                             :start-after: [start-cnn-functional-torch]
                             :end-before: [end-cnn-functional-torch]
 
+            .. group-tab:: |_4| |jax| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: setup-style
+
+                        .. literalinclude:: ../../snippets/deterministic_model.py
+                            :language: python
+                            :start-after: [start-cnn-setup-jax]
+                            :end-before: [end-cnn-setup-jax]
+
+                    .. group-tab:: compact-style
+
+                        .. literalinclude:: ../../snippets/deterministic_model.py
+                            :language: python
+                            :start-after: [start-cnn-compact-jax]
+                            :end-before: [end-cnn-compact-jax]
+
     .. tab:: RNN
 
         .. image:: ../../_static/imgs/model_deterministic_rnn-light.svg
diff --git a/docs/source/api/models/gaussian.rst b/docs/source/api/models/gaussian.rst
index 721d79bd..1aa50c05 100644
--- a/docs/source/api/models/gaussian.rst
+++ b/docs/source/api/models/gaussian.rst
@@ -163,6 +163,24 @@ Usage
                             :start-after: [start-cnn-functional-torch]
                             :end-before: [end-cnn-functional-torch]
 
+            .. group-tab:: |_4| |jax| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: setup-style
+
+                        .. literalinclude:: ../../snippets/gaussian_model.py
+                            :language: python
+                            :start-after: [start-cnn-setup-jax]
+                            :end-before: [end-cnn-setup-jax]
+
+                    .. group-tab:: compact-style
+
+                        .. literalinclude:: ../../snippets/gaussian_model.py
+                            :language: python
+                            :start-after: [start-cnn-compact-jax]
+                            :end-before: [end-cnn-compact-jax]
+
     .. tab:: RNN
 
         .. image:: ../../_static/imgs/model_gaussian_rnn-light.svg
diff --git a/docs/source/api/models/multicategorical.rst b/docs/source/api/models/multicategorical.rst
new file mode 100644
index 00000000..a8c89065
--- /dev/null
+++ b/docs/source/api/models/multicategorical.rst
@@ -0,0 +1,401 @@
+.. _models_multicategorical:
+
+Multi-Categorical model
+=======================
+
+Multi-Categorical models run **discrete-domain stochastic** policies.
+
+.. raw:: html
+
+    <br><hr>
+
+skrl provides a Python mixin (:literal:`MultiCategoricalMixin`) to assist in the creation of these types of models, allowing users to have full control over the function approximator definitions and architectures. Note that the use of this mixin must comply with the following rules:
+
+* The definition of multiple inheritance must always include the :ref:`Model <models_base_class>` base class at the end.
+
+* The :ref:`Model <models_base_class>` base class constructor must be invoked before the mixins constructor.
+
+.. warning::
+
+    For models in JAX/Flax it is imperative to define all parameters (except ``observation_space``, ``action_space`` and ``device``) with default values to avoid errors (``TypeError: __init__() missing N required positional argument``) during initialization.
+
+    In addition, it is necessary to initialize the model's ``state_dict`` (via the ``init_state_dict`` method) after its instantiation to avoid errors (``AttributeError: object has no attribute "state_dict". If "state_dict" is defined in '.setup()', remember these fields are only accessible from inside 'init' or 'apply'``) during its use.
+
+.. tabs::
+
+    .. group-tab:: |_4| |pytorch| |_4|
+
+        .. literalinclude:: ../../snippets/multicategorical_model.py
+            :language: python
+            :emphasize-lines: 1, 3-4
+            :start-after: [start-definition-torch]
+            :end-before: [end-definition-torch]
+
+    .. group-tab:: |_4| |jax| |_4|
+
+        .. literalinclude:: ../../snippets/multicategorical_model.py
+            :language: python
+            :emphasize-lines: 1, 3-4
+            :start-after: [start-definition-jax]
+            :end-before: [end-definition-jax]
+
+.. raw:: html
+
+    <br>
+
+Concept
+-------
+
+.. image:: ../../_static/imgs/model_multicategorical-light.svg
+    :width: 100%
+    :align: center
+    :class: only-light
+    :alt: Multi-Categorical model
+
+.. image:: ../../_static/imgs/model_multicategorical-dark.svg
+    :width: 100%
+    :align: center
+    :class: only-dark
+    :alt: Multi-Categorical model
+
+.. raw:: html
+
+    <br>
+
+Usage
+-----
+
+* Multi-Layer Perceptron (**MLP**)
+* Convolutional Neural Network (**CNN**)
+* Recurrent Neural Network (**RNN**)
+* Gated Recurrent Unit RNN (**GRU**)
+* Long Short-Term Memory RNN (**LSTM**)
+
+.. tabs::
+
+    .. tab:: MLP
+
+        .. image:: ../../_static/imgs/model_categorical_mlp-light.svg
+            :width: 40%
+            :align: center
+            :class: only-light
+
+        .. image:: ../../_static/imgs/model_categorical_mlp-dark.svg
+            :width: 40%
+            :align: center
+            :class: only-dark
+
+        .. raw:: html
+
+            <br>
+
+        .. tabs::
+
+            .. group-tab:: |_4| |pytorch| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: nn.Sequential
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-mlp-sequential-torch]
+                            :end-before: [end-mlp-sequential-torch]
+
+                    .. group-tab:: nn.functional
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-mlp-functional-torch]
+                            :end-before: [end-mlp-functional-torch]
+
+            .. group-tab:: |_4| |jax| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: setup-style
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-mlp-setup-jax]
+                            :end-before: [end-mlp-setup-jax]
+
+                    .. group-tab:: compact-style
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-mlp-compact-jax]
+                            :end-before: [end-mlp-compact-jax]
+
+    .. tab:: CNN
+
+        .. image:: ../../_static/imgs/model_categorical_cnn-light.svg
+            :width: 100%
+            :align: center
+            :class: only-light
+
+        .. image:: ../../_static/imgs/model_categorical_cnn-dark.svg
+            :width: 100%
+            :align: center
+            :class: only-dark
+
+        .. raw:: html
+
+            <br>
+
+        .. tabs::
+
+            .. group-tab:: |_4| |pytorch| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: nn.Sequential
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-sequential-torch]
+                            :end-before: [end-cnn-sequential-torch]
+
+                    .. group-tab:: nn.functional
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-functional-torch]
+                            :end-before: [end-cnn-functional-torch]
+
+            .. group-tab:: |_4| |jax| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: setup-style
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-setup-jax]
+                            :end-before: [end-cnn-setup-jax]
+
+                    .. group-tab:: compact-style
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-cnn-compact-jax]
+                            :end-before: [end-cnn-compact-jax]
+
+    .. tab:: RNN
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-light.svg
+            :width: 90%
+            :align: center
+            :class: only-light
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-dark.svg
+            :width: 90%
+            :align: center
+            :class: only-dark
+
+        where:
+
+        .. math::
+            \begin{aligned}
+                N ={} & \text{batch size} \\
+                L ={} & \text{sequence length} \\
+                D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
+                H_{in} ={} & \text{input_size} \\
+                H_{out} ={} & \text{hidden_size}
+            \end{aligned}
+
+        .. raw:: html
+
+            <hr>
+
+        The following points are relevant in the definition of recurrent models:
+
+        * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state
+
+        * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary:
+
+            * ``"states"``: state of the environment used to make the decision
+            * ``"taken_actions"``: actions taken by the policy for the given states, if applicable
+            * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process
+            * ``"rnn"``: list of initial hidden states ordered according to the model specification
+
+        * The ``.compute()`` method must include, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state
+
+        .. raw:: html
+
+            <br>
+
+        .. tabs::
+
+            .. group-tab:: |_4| |pytorch| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: nn.Sequential
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-rnn-sequential-torch]
+                            :end-before: [end-rnn-sequential-torch]
+
+                    .. group-tab:: nn.functional
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-rnn-functional-torch]
+                            :end-before: [end-rnn-functional-torch]
+
+    .. tab:: GRU
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-light.svg
+            :width: 90%
+            :align: center
+            :class: only-light
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-dark.svg
+            :width: 90%
+            :align: center
+            :class: only-dark
+
+        where:
+
+        .. math::
+            \begin{aligned}
+                N ={} & \text{batch size} \\
+                L ={} & \text{sequence length} \\
+                D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
+                H_{in} ={} & \text{input_size} \\
+                H_{out} ={} & \text{hidden_size}
+            \end{aligned}
+
+        .. raw:: html
+
+            <hr>
+
+        The following points are relevant in the definition of recurrent models:
+
+        * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state
+
+        * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary:
+
+            * ``"states"``: state of the environment used to make the decision
+            * ``"taken_actions"``: actions taken by the policy for the given states, if applicable
+            * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process
+            * ``"rnn"``: list of initial hidden states ordered according to the model specification
+
+        * The ``.compute()`` method must include, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state
+
+        .. raw:: html
+
+            <br>
+
+        .. tabs::
+
+            .. group-tab:: |_4| |pytorch| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: nn.Sequential
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-gru-sequential-torch]
+                            :end-before: [end-gru-sequential-torch]
+
+                    .. group-tab:: nn.functional
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-gru-functional-torch]
+                            :end-before: [end-gru-functional-torch]
+
+    .. tab:: LSTM
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-light.svg
+            :width: 90%
+            :align: center
+            :class: only-light
+
+        .. image:: ../../_static/imgs/model_categorical_rnn-dark.svg
+            :width: 90%
+            :align: center
+            :class: only-dark
+
+        where:
+
+        .. math::
+            \begin{aligned}
+                N ={} & \text{batch size} \\
+                L ={} & \text{sequence length} \\
+                D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
+                H_{in} ={} & \text{input_size} \\
+                H_{cell} ={} & \text{hidden_size} \\
+                H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\
+            \end{aligned}
+
+        .. raw:: html
+
+            <hr>
+
+        The following points are relevant in the definition of recurrent models:
+
+        * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden/cell states
+
+        * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary:
+
+            * ``"states"``: state of the environment used to make the decision
+            * ``"taken_actions"``: actions taken by the policy for the given states, if applicable
+            * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process
+            * ``"rnn"``: list of initial hidden/cell states ordered according to the model specification
+
+        * The ``.compute()`` method must include, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden/cell states
+
+        .. raw:: html
+
+            <br>
+
+        .. tabs::
+
+            .. group-tab:: |_4| |pytorch| |_4|
+
+                .. tabs::
+
+                    .. group-tab:: nn.Sequential
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-lstm-sequential-torch]
+                            :end-before: [end-lstm-sequential-torch]
+
+                    .. group-tab:: nn.functional
+
+                        .. literalinclude:: ../../snippets/multicategorical_model.py
+                            :language: python
+                            :start-after: [start-lstm-functional-torch]
+                            :end-before: [end-lstm-functional-torch]
+
+.. raw:: html
+
+    <br>
+
+API (PyTorch)
+-------------
+
+.. autoclass:: skrl.models.torch.multicategorical.MultiCategoricalMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
+
+.. raw:: html
+
+    <br>
+
+API (JAX)
+---------
+
+.. autoclass:: skrl.models.jax.multicategorical.MultiCategoricalMixin
+    :show-inheritance:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/api/multi_agents/ippo.rst b/docs/source/api/multi_agents/ippo.rst
index 8557a028..9a259326 100644
--- a/docs/source/api/multi_agents/ippo.rst
+++ b/docs/source/api/multi_agents/ippo.rst
@@ -171,6 +171,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\blacksquare`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -201,7 +204,10 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Categorical <models_categorical>` /
+        |br| :ref:`Multi-Categorical <models_multicategorical>` /
+        |br| :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/multi_agents/mappo.rst b/docs/source/api/multi_agents/mappo.rst
index ea8b82ad..c875ac6a 100644
--- a/docs/source/api/multi_agents/mappo.rst
+++ b/docs/source/api/multi_agents/mappo.rst
@@ -172,6 +172,9 @@ The implementation supports the following `Gym spaces <https://www.gymlibrary.de
     * - Discrete
       - .. centered:: :math:`\square`
       - .. centered:: :math:`\blacksquare`
+    * - MultiDiscrete
+      - .. centered:: :math:`\square`
+      - .. centered:: :math:`\blacksquare`
     * - Box
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
@@ -202,7 +205,10 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi
       - :literal:`"policy"`
       - observation
       - action
-      - :ref:`Categorical <models_categorical>` / :ref:`Gaussian <models_gaussian>` / :ref:`MultivariateGaussian <models_multivariate_gaussian>`
+      - :ref:`Categorical <models_categorical>` /
+        |br| :ref:`Multi-Categorical <models_multicategorical>` /
+        |br| :ref:`Gaussian <models_gaussian>` /
+        |br| :ref:`MultivariateGaussian <models_multivariate_gaussian>`
     * - :math:`V_\phi(s)`
       - Value
       - :literal:`"value"`
diff --git a/docs/source/api/trainers.rst b/docs/source/api/trainers.rst
index 45d4e5f1..83055038 100644
--- a/docs/source/api/trainers.rst
+++ b/docs/source/api/trainers.rst
@@ -6,7 +6,8 @@ Trainers
 
     Sequential <trainers/sequential>
     Parallel <trainers/parallel>
-    Manual <trainers/manual>
+    Step <trainers/step>
+    Manual training <trainers/manual>
 
 Trainers are responsible for orchestrating and managing the training/evaluation of agents and their interactions with the environment.
 
@@ -26,7 +27,10 @@ Trainers are responsible for orchestrating and managing the training/evaluation
     * - :doc:`Parallel trainer <trainers/parallel>`
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\square`
-    * - :doc:`Manual trainer <trainers/manual>`
+    * - :doc:`Step trainer <trainers/step>`
+      - .. centered:: :math:`\blacksquare`
+      - .. centered:: :math:`\blacksquare`
+    * - :doc:`Manual training <trainers/manual>`
       - .. centered:: :math:`\blacksquare`
       - .. centered:: :math:`\blacksquare`
 
diff --git a/docs/source/api/trainers/manual.rst b/docs/source/api/trainers/manual.rst
index 439755e0..61047610 100644
--- a/docs/source/api/trainers/manual.rst
+++ b/docs/source/api/trainers/manual.rst
@@ -1,5 +1,5 @@
-Manual trainer
-==============
+Manual training
+===============
 
 Train agents by manually controlling the training/evaluation loop.
 
@@ -33,60 +33,40 @@ Usage
 
     .. group-tab:: |_4| |pytorch| |_4|
 
-        .. literalinclude:: ../../snippets/trainer.py
-            :language: python
-            :start-after: [pytorch-start-manual]
-            :end-before: [pytorch-end-manual]
+        .. tabs::
 
-    .. group-tab:: |_4| |jax| |_4|
-
-        .. literalinclude:: ../../snippets/trainer.py
-            :language: python
-            :start-after: [jax-start-manual]
-            :end-before: [jax-end-manual]
-
-.. raw:: html
+            .. group-tab:: Training
 
-    <br>
+                .. literalinclude:: ../../snippets/trainer.py
+                    :language: python
+                    :start-after: [pytorch-start-manual-training]
+                    :end-before: [pytorch-end-manual-training]
 
-Configuration
--------------
+            .. group-tab:: Evaluation
 
-.. literalinclude:: ../../../../skrl/trainers/torch/manual.py
-    :language: python
-    :lines: 14-19
-    :linenos:
+                .. literalinclude:: ../../snippets/trainer.py
+                    :language: python
+                    :start-after: [pytorch-start-manual-evaluation]
+                    :end-before: [pytorch-end-manual-evaluation]
 
-.. raw:: html
+    .. group-tab:: |_4| |jax| |_4|
 
-    <br>
+        .. tabs::
 
-API (PyTorch)
--------------
+            .. group-tab:: Training
 
-.. autoclass:: skrl.trainers.torch.manual.MANUAL_TRAINER_DEFAULT_CONFIG
+                .. literalinclude:: ../../snippets/trainer.py
+                    :language: python
+                    :start-after: [jax-start-manual-training]
+                    :end-before: [jax-end-manual-training]
 
-.. autoclass:: skrl.trainers.torch.manual.ManualTrainer
-    :undoc-members:
-    :show-inheritance:
-    :inherited-members:
-    :members:
+            .. group-tab:: Evaluation
 
-    .. automethod:: __init__
+                .. literalinclude:: ../../snippets/trainer.py
+                    :language: python
+                    :start-after: [jax-start-manual-evaluation]
+                    :end-before: [jax-end-manual-evaluation]
 
 .. raw:: html
 
     <br>
-
-API (JAX)
----------
-
-.. autoclass:: skrl.trainers.jax.manual.MANUAL_TRAINER_DEFAULT_CONFIG
-
-.. autoclass:: skrl.trainers.jax.manual.ManualTrainer
-    :undoc-members:
-    :show-inheritance:
-    :inherited-members:
-    :members:
-
-    .. automethod:: __init__
diff --git a/docs/source/api/trainers/parallel.rst b/docs/source/api/trainers/parallel.rst
index beff9be4..241b92d9 100644
--- a/docs/source/api/trainers/parallel.rst
+++ b/docs/source/api/trainers/parallel.rst
@@ -55,8 +55,8 @@ Configuration
 
 .. literalinclude:: ../../../../skrl/trainers/torch/parallel.py
     :language: python
-    :lines: 15-20
-    :linenos:
+    :start-after: [start-config-dict-torch]
+    :end-before: [end-config-dict-torch]
 
 .. raw:: html
 
diff --git a/docs/source/api/trainers/sequential.rst b/docs/source/api/trainers/sequential.rst
index a4ee9095..6728dfb4 100644
--- a/docs/source/api/trainers/sequential.rst
+++ b/docs/source/api/trainers/sequential.rst
@@ -54,8 +54,8 @@ Configuration
 
 .. literalinclude:: ../../../../skrl/trainers/torch/sequential.py
     :language: python
-    :lines: 14-19
-    :linenos:
+    :start-after: [start-config-dict-torch]
+    :end-before: [end-config-dict-torch]
 
 .. raw:: html
 
diff --git a/docs/source/api/trainers/step.rst b/docs/source/api/trainers/step.rst
new file mode 100644
index 00000000..ebb40e2c
--- /dev/null
+++ b/docs/source/api/trainers/step.rst
@@ -0,0 +1,92 @@
+Step trainer
+============
+
+Train agents controlling the training/evaluation loop step-by-step.
+
+.. raw:: html
+
+    <br><hr>
+
+Concept
+-------
+
+.. image:: ../../_static/imgs/manual_trainer-light.svg
+    :width: 100%
+    :align: center
+    :class: only-light
+    :alt: Step-by-step trainer
+
+.. image:: ../../_static/imgs/manual_trainer-dark.svg
+    :width: 100%
+    :align: center
+    :class: only-dark
+    :alt: Step-by-step trainer
+
+.. raw:: html
+
+    <br>
+
+Usage
+-----
+
+.. tabs::
+
+    .. group-tab:: |_4| |pytorch| |_4|
+
+        .. literalinclude:: ../../snippets/trainer.py
+            :language: python
+            :start-after: [pytorch-start-step]
+            :end-before: [pytorch-end-step]
+
+    .. group-tab:: |_4| |jax| |_4|
+
+        .. literalinclude:: ../../snippets/trainer.py
+            :language: python
+            :start-after: [jax-start-step]
+            :end-before: [jax-end-step]
+
+.. raw:: html
+
+    <br>
+
+Configuration
+-------------
+
+.. literalinclude:: ../../../../skrl/trainers/torch/step.py
+    :language: python
+    :start-after: [start-config-dict-torch]
+    :end-before: [end-config-dict-torch]
+
+.. raw:: html
+
+    <br>
+
+API (PyTorch)
+-------------
+
+.. autoclass:: skrl.trainers.torch.step.STEP_TRAINER_DEFAULT_CONFIG
+
+.. autoclass:: skrl.trainers.torch.step.StepTrainer
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
+    :members:
+
+    .. automethod:: __init__
+
+.. raw:: html
+
+    <br>
+
+API (JAX)
+---------
+
+.. autoclass:: skrl.trainers.jax.step.STEP_TRAINER_DEFAULT_CONFIG
+
+.. autoclass:: skrl.trainers.jax.step.StepTrainer
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
+    :members:
+
+    .. automethod:: __init__
diff --git a/docs/source/api/utils/isaacgym_utils.rst b/docs/source/api/utils/isaacgym_utils.rst
index 88d5a188..882fbd81 100644
--- a/docs/source/api/utils/isaacgym_utils.rst
+++ b/docs/source/api/utils/isaacgym_utils.rst
@@ -107,7 +107,6 @@ Usage
 
         .. literalinclude:: ../../snippets/isaacgym_utils.py
             :language: python
-            :linenos:
             :emphasize-lines: 4, 8, 56, 65-68
 
 .. raw:: html
diff --git a/docs/source/api/utils/postprocessing.rst b/docs/source/api/utils/postprocessing.rst
index bc575f85..5eeb4ec5 100644
--- a/docs/source/api/utils/postprocessing.rst
+++ b/docs/source/api/utils/postprocessing.rst
@@ -25,7 +25,6 @@ Usage
 
         .. literalinclude:: ../../snippets/utils_postprocessing.py
             :language: python
-            :linenos:
             :emphasize-lines: 1, 5-6
             :start-after: [start-memory_file_iterator-torch]
             :end-before: [end-memory_file_iterator-torch]
@@ -34,7 +33,6 @@ Usage
 
         .. literalinclude:: ../../snippets/utils_postprocessing.py
             :language: python
-            :linenos:
             :emphasize-lines: 1, 5-6
             :start-after: [start-memory_file_iterator-numpy]
             :end-before: [end-memory_file_iterator-numpy]
@@ -43,7 +41,6 @@ Usage
 
         .. literalinclude:: ../../snippets/utils_postprocessing.py
             :language: python
-            :linenos:
             :emphasize-lines: 1, 5-6
             :start-after: [start-memory_file_iterator-csv]
             :end-before: [end-memory_file_iterator-csv]
@@ -101,7 +98,6 @@ Usage
 
         .. literalinclude:: ../../snippets/utils_postprocessing.py
             :language: python
-            :linenos:
             :emphasize-lines: 1, 5-7
             :start-after: [start-tensorboard_file_iterator-list]
             :end-before: [end-tensorboard_file_iterator-list]
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 42f37f8a..f04c7874 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -16,7 +16,7 @@
 if skrl.__version__ != "unknown":
     release = version = skrl.__version__
 else:
-    release = version = "1.0.0"
+    release = version = "1.1.0"
 
 master_doc = "index"
 
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py
index 9e7f07bd..248f342b 100644
--- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py
@@ -1,9 +1,6 @@
 import torch
 import numpy as np
 
-from omni.isaac.core.utils.extensions import enable_extension
-enable_extension("omni.replicator.isaac")  # required by OIGE
-
 from omniisaacgymenvs.tasks.base.rl_task import RLTask
 from omniisaacgymenvs.robots.articulations.franka import Franka as Robot
 
@@ -27,6 +24,8 @@
             "headless": True,
             "sim_device": "gpu",
             "enable_livestream": False,
+            "warp": False,
+            "seed": 42,
             "task": {"name": "ReachingFranka",
                      "physics_engine": "physx",
                      "env": {"numEnvs": 1024,
@@ -86,6 +85,7 @@
                                        "rest_offset": 0.0},
                              "target": {"override_usd_defaults": False,
                                         "fixed_base": True,
+                                        "make_kinematic": True,
                                         "enable_self_collisions": False,
                                         "enable_gyroscopic_forces": True,
                                         "solver_position_iteration_count": 4,
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py
index ed4416b4..204a5584 100644
--- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py
@@ -8,6 +8,11 @@
 from skrl.trainers.torch import SequentialTrainer
 from skrl.utils.omniverse_isaacgym_utils import get_env_instance
 from skrl.envs.torch import wrap_env
+from skrl.utils import set_seed
+
+
+# Seed for reproducibility
+seed = set_seed()  # e.g. `set_seed(42)` for fixed seed
 
 
 # Define only the policy for evaluation
@@ -37,6 +42,7 @@ def compute(self, inputs, role):
 from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
 from reaching_franka_omniverse_isaacgym_env import ReachingFrankaTask, TASK_CFG
 
+TASK_CFG["seed"] = seed
 TASK_CFG["headless"] = headless
 TASK_CFG["task"]["env"]["numEnvs"] = 64
 TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py
index 899df201..75d28c3e 100644
--- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py
+++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py
@@ -13,8 +13,8 @@
 from skrl.utils import set_seed
 
 
-# set the seed for reproducibility
-set_seed(42)
+# Seed for reproducibility
+seed = set_seed()  # e.g. `set_seed(42)` for fixed seed
 
 
 # Define the models (stochastic and deterministic models) for the agent using helper mixin.
@@ -62,6 +62,7 @@ def compute(self, inputs, role):
 from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
 from reaching_franka_omniverse_isaacgym_env import ReachingFrankaTask, TASK_CFG
 
+TASK_CFG["seed"] = seed
 TASK_CFG["headless"] = headless
 TASK_CFG["task"]["env"]["numEnvs"] = 1024
 TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py
index f3a5a458..495892cd 100644
--- a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py
+++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py
@@ -1,9 +1,6 @@
 import torch
 import numpy as np
 
-from omni.isaac.core.utils.extensions import enable_extension
-enable_extension("omni.replicator.isaac")  # required by OIGE
-
 from omniisaacgymenvs.tasks.base.rl_task import RLTask
 
 from omni.isaac.core.prims import RigidPrimView
@@ -28,6 +25,8 @@
             "headless": True,
             "sim_device": "gpu",
             "enable_livestream": False,
+            "warp": False,
+            "seed": 42,
             "task": {"name": "ReachingIiwa",
                      "physics_engine": "physx",
                      "env": {"numEnvs": 1024,
@@ -87,6 +86,7 @@
                                        "rest_offset": 0.0},
                              "target": {"override_usd_defaults": False,
                                         "fixed_base": True,
+                                        "make_kinematic": True,
                                         "enable_self_collisions": False,
                                         "enable_gyroscopic_forces": True,
                                         "solver_position_iteration_count": 4,
diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py
index c9494217..c6cb93b8 100644
--- a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py
+++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py
@@ -8,6 +8,11 @@
 from skrl.trainers.torch import SequentialTrainer
 from skrl.utils.omniverse_isaacgym_utils import get_env_instance
 from skrl.envs.torch import wrap_env
+from skrl.utils import set_seed
+
+
+# Seed for reproducibility
+seed = set_seed()  # e.g. `set_seed(42)` for fixed seed
 
 
 # Define only the policy for evaluation
@@ -37,6 +42,7 @@ def compute(self, inputs, role):
 from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
 from reaching_iiwa_omniverse_isaacgym_env import ReachingIiwaTask, TASK_CFG
 
+TASK_CFG["seed"] = seed
 TASK_CFG["headless"] = headless
 TASK_CFG["task"]["env"]["numEnvs"] = 64
 TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py
index d109085a..861cd77e 100644
--- a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py
+++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py
@@ -13,8 +13,8 @@
 from skrl.utils import set_seed
 
 
-# set the seed for reproducibility
-set_seed(42)
+# Seed for reproducibility
+seed = set_seed()  # e.g. `set_seed(42)` for fixed seed
 
 
 # Define the models (stochastic and deterministic models) for the agent using helper mixin.
@@ -62,6 +62,7 @@ def compute(self, inputs, role):
 from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig
 from reaching_iiwa_omniverse_isaacgym_env import ReachingIiwaTask, TASK_CFG
 
+TASK_CFG["seed"] = seed
 TASK_CFG["headless"] = headless
 TASK_CFG["task"]["env"]["numEnvs"] = 1024
 TASK_CFG["task"]["env"]["controlSpace"] = "joint"  # "joint" or "cartesian"
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5ab4e515..d587c727 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -154,6 +154,7 @@ Models
 
     * :doc:`Tabular model <api/models/tabular>` (discrete domain)
     * :doc:`Categorical model <api/models/categorical>` (discrete domain)
+    * :doc:`Multi-Categorical model <api/models/multicategorical>` (discrete domain)
     * :doc:`Gaussian model <api/models/gaussian>` (continuous domain)
     * :doc:`Multivariate Gaussian model <api/models/multivariate_gaussian>` (continuous domain)
     * :doc:`Deterministic model <api/models/deterministic>` (continuous domain)
@@ -165,7 +166,7 @@ Trainers
 
     * :doc:`Sequential trainer <api/trainers/sequential>`
     * :doc:`Parallel trainer <api/trainers/parallel>`
-    * :doc:`Manual trainer <api/trainers/manual>`
+    * :doc:`Step trainer <api/trainers/step>`
 
 Resources
 ^^^^^^^^^
diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst
index db64b244..75320efe 100644
--- a/docs/source/intro/getting_started.rst
+++ b/docs/source/intro/getting_started.rst
@@ -762,7 +762,7 @@ The following code snippets show how to train/evaluate RL systems using the avai
                     :start-after: [pytorch-start-parallel]
                     :end-before: [pytorch-end-parallel]
 
-    .. tab:: Manual trainer
+    .. tab:: Step trainer
 
         .. tabs::
 
@@ -770,15 +770,15 @@ The following code snippets show how to train/evaluate RL systems using the avai
 
                 .. literalinclude:: ../snippets/trainer.py
                     :language: python
-                    :start-after: [pytorch-start-manual]
-                    :end-before: [pytorch-end-manual]
+                    :start-after: [pytorch-start-step]
+                    :end-before: [pytorch-end-step]
 
             .. group-tab:: |_4| |jax| |_4|
 
                 .. literalinclude:: ../snippets/trainer.py
                     :language: python
-                    :start-after: [jax-start-manual]
-                    :end-before: [jax-end-manual]
+                    :start-after: [jax-start-step]
+                    :end-before: [jax-end-step]
 
 .. raw:: html
 
diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py
index 40800ab2..7afababc 100644
--- a/docs/source/snippets/categorical_model.py
+++ b/docs/source/snippets/categorical_model.py
@@ -242,6 +242,103 @@ def compute(self, inputs, role):
              unnormalized_log_prob=True)
 # [end-cnn-functional-torch]
 
+# [start-cnn-setup-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, CategoricalMixin
+
+
+# define the model
+class CNN(CategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+    def setup(self):
+        self.conv1 = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")
+        self.conv2 = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")
+        self.conv3 = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")
+        self.fc1 = nn.Dense(512)
+        self.fc2 = nn.Dense(16)
+        self.fc3 = nn.Dense(64)
+        self.fc4 = nn.Dense(32)
+        self.fc5 = nn.Dense(self.num_actions)
+
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = self.conv1(x)
+        x = nn.relu(x)
+        x = self.conv2(x)
+        x = nn.relu(x)
+        x = self.conv3(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = self.fc1(x)
+        x = nn.relu(x)
+        x = self.fc2(x)
+        x = nn.tanh(x)
+        x = self.fc3(x)
+        x = nn.tanh(x)
+        x = self.fc4(x)
+        x = nn.tanh(x)
+        x = self.fc5(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True)
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-setup-jax]
+
+# [start-cnn-compact-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, CategoricalMixin
+
+
+# define the model
+class CNN(CategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        CategoricalMixin.__init__(self, unnormalized_log_prob)
+
+    @nn.compact  # marks the given module method allowing inlined submodules
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = nn.Dense(512)(x)
+        x = nn.relu(x)
+        x = nn.Dense(16)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(64)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(32)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(self.num_actions)(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True)
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-compact-jax]
+
 # =============================================================================
 
 # [start-rnn-sequential-torch]
diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py
index 38daa3e9..2022ae71 100644
--- a/docs/source/snippets/deterministic_model.py
+++ b/docs/source/snippets/deterministic_model.py
@@ -246,6 +246,107 @@ def compute(self, inputs, role):
              clip_actions=False)
 # [end-cnn-functional-torch]
 
+# [start-cnn-setup-jax]
+import jax.numpy as jnp
+import flax.linen as nn
+
+from skrl.models.jax import Model, DeterministicMixin
+
+
+# define the model
+class CNN(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, clip_actions=False, **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        DeterministicMixin.__init__(self, clip_actions)
+
+    def setup(self):
+        self.conv1 = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")
+        self.conv2 = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")
+        self.conv3 = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")
+        self.fc1 = nn.Dense(512)
+        self.fc2 = nn.Dense(16)
+        self.fc3 = nn.Dense(64)
+        self.fc4 = nn.Dense(32)
+        self.fc5 = nn.Dense(1)
+
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = self.conv1(x)
+        x = nn.relu(x)
+        x = self.conv2(x)
+        x = nn.relu(x)
+        x = self.conv3(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = self.fc1(x)
+        x = nn.relu(x)
+        x = self.fc2(x)
+        x = nn.tanh(x)
+        x = jnp.concatenate([x, inputs["taken_actions"]], axis=-1)
+        x = self.fc3(x)
+        x = nn.tanh(x)
+        x = self.fc4(x)
+        x = nn.tanh(x)
+        x = self.fc5(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+critic = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             clip_actions=False)
+
+# initialize model's state dict
+critic.init_state_dict("critic")
+# [end-cnn-setup-jax]
+
+# [start-cnn-compact-jax]
+import jax.numpy as jnp
+import flax.linen as nn
+
+from skrl.models.jax import Model, DeterministicMixin
+
+
+# define the model
+class CNN(DeterministicMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, clip_actions=False, **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        DeterministicMixin.__init__(self, clip_actions)
+
+    @nn.compact  # marks the given module method allowing inlined submodules
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = nn.Dense(512)(x)
+        x = nn.relu(x)
+        x = nn.Dense(16)(x)
+        x = nn.tanh(x)
+        x = jnp.concatenate([x, inputs["taken_actions"]], axis=-1)
+        x = nn.Dense(64)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(32)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(1)(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+critic = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             clip_actions=False)
+
+# initialize model's state dict
+critic.init_state_dict("critic")
+# [end-cnn-compact-jax]
+
 # =============================================================================
 
 # [start-rnn-sequential-torch]
diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py
index 952c994e..3f48aa58 100644
--- a/docs/source/snippets/gaussian_model.py
+++ b/docs/source/snippets/gaussian_model.py
@@ -289,6 +289,118 @@ def compute(self, inputs, role):
              reduction="sum")
 # [end-cnn-functional-torch]
 
+# [start-cnn-setup-jax]
+import jax.numpy as jnp
+import flax.linen as nn
+
+from skrl.models.jax import Model, GaussianMixin
+
+
+# define the model
+class CNN(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device=None,
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+
+    def setup(self):
+        self.conv1 = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")
+        self.conv2 = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")
+        self.conv3 = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")
+        self.fc1 = nn.Dense(512)
+        self.fc2 = nn.Dense(16)
+        self.fc3 = nn.Dense(64)
+        self.fc4 = nn.Dense(32)
+        self.fc5 = nn.Dense(self.num_actions)
+
+        self.log_std_parameter = self.param("log_std_parameter", lambda _: jnp.zeros(self.num_actions))
+
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = self.conv1(x)
+        x = nn.relu(x)
+        x = self.conv2(x)
+        x = nn.relu(x)
+        x = self.conv3(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = self.fc1(x)
+        x = nn.relu(x)
+        x = self.fc2(x)
+        x = nn.tanh(x)
+        x = self.fc3(x)
+        x = nn.tanh(x)
+        x = self.fc4(x)
+        x = nn.tanh(x)
+        x = self.fc5(x)
+        return nn.tanh(x), self.log_std_parameter, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             clip_actions=True,
+             clip_log_std=True,
+             min_log_std=-20,
+             max_log_std=2,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-setup-jax]
+
+# [start-cnn-compact-jax]
+import jax.numpy as jnp
+import flax.linen as nn
+
+from skrl.models.jax import Model, GaussianMixin
+
+
+# define the model
+class CNN(GaussianMixin, Model):
+    def __init__(self, observation_space, action_space, device=None,
+                 clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
+
+    @nn.compact  # marks the given module method allowing inlined submodules
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = nn.Dense(512)(x)
+        x = nn.relu(x)
+        x = nn.Dense(16)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(64)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(32)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(self.num_actions)(x)
+        log_std_parameter = self.param("log_std_parameter", lambda _: jnp.zeros(self.num_actions))
+        return nn.tanh(x), log_std_parameter, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             clip_actions=True,
+             clip_log_std=True,
+             min_log_std=-20,
+             max_log_std=2,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-compact-jax]
+
 # =============================================================================
 
 # [start-rnn-sequential-torch]
diff --git a/docs/source/snippets/multicategorical_model.py b/docs/source/snippets/multicategorical_model.py
new file mode 100644
index 00000000..7dbcc422
--- /dev/null
+++ b/docs/source/snippets/multicategorical_model.py
@@ -0,0 +1,892 @@
+# [start-definition-torch]
+class MultiCategoricalModel(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+# [end-definition-torch]
+
+
+# [start-definition-jax]
+class MultiCategoricalModel(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+# [end-definition-jax]
+
+# =============================================================================
+
+# [start-mlp-sequential-torch]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class MLP(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.net = nn.Sequential(nn.Linear(self.num_observations, 64),
+                                 nn.ReLU(),
+                                 nn.Linear(64, 32),
+                                 nn.ReLU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def compute(self, inputs, role):
+        return self.net(inputs["states"]), {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = MLP(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+# [end-mlp-sequential-torch]
+
+# [start-mlp-functional-torch]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class MLP(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.fc1 = nn.Linear(self.num_observations, 64)
+        self.fc2 = nn.Linear(64, 32)
+        self.logits = nn.Linear(32, self.num_actions)
+
+    def compute(self, inputs, role):
+        x = self.fc1(inputs["states"])
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+        return self.logits(x), {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = MLP(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+# [end-mlp-functional-torch]
+
+# [start-mlp-setup-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, MultiCategoricalMixin
+
+
+# define the model
+class MLP(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+    def setup(self):
+        self.fc1 = nn.Dense(64)
+        self.fc2 = nn.Dense(32)
+        self.fc3 = nn.Dense(self.num_actions)
+
+    def __call__(self, inputs, role):
+        x = self.fc1(inputs["states"])
+        x = nn.relu(x)
+        x = self.fc2(x)
+        x = nn.relu(x)
+        x = self.fc3(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = MLP(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-mlp-setup-jax]
+
+# [start-mlp-compact-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, MultiCategoricalMixin
+
+
+# define the model
+class MLP(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+    @nn.compact  # marks the given module method allowing inlined submodules
+    def __call__(self, inputs, role):
+        x = nn.Dense(64)(inputs["states"])
+        x = nn.relu(x)
+        x = nn.Dense(32)(x)
+        x = nn.relu(x)
+        x = nn.Dense(self.num_actions)(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = MLP(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-mlp-compact-jax]
+
+# =============================================================================
+
+# [start-cnn-sequential-torch]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class CNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4),
+                                 nn.ReLU(),
+                                 nn.Conv2d(32, 64, kernel_size=4, stride=2),
+                                 nn.ReLU(),
+                                 nn.Conv2d(64, 64, kernel_size=3, stride=1),
+                                 nn.ReLU(),
+                                 nn.Flatten(),
+                                 nn.Linear(1024, 512),
+                                 nn.ReLU(),
+                                 nn.Linear(512, 16),
+                                 nn.Tanh(),
+                                 nn.Linear(16, 64),
+                                 nn.Tanh(),
+                                 nn.Linear(64, 32),
+                                 nn.Tanh(),
+                                 nn.Linear(32, self.num_actions))
+
+    def compute(self, inputs, role):
+        # permute (samples, width * height * channels) -> (samples, channels, width, height)
+        return self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)), {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+# [end-cnn-sequential-torch]
+
+# [start-cnn-functional-torch]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class CNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum"):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
+        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
+        self.fc1 = nn.Linear(1024, 512)
+        self.fc2 = nn.Linear(512, 16)
+        self.fc3 = nn.Linear(16, 64)
+        self.fc4 = nn.Linear(64, 32)
+        self.fc5 = nn.Linear(32, self.num_actions)
+
+    def compute(self, inputs, role):
+        # permute (samples, width * height * channels) -> (samples, channels, width, height)
+        x = inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = self.conv3(x)
+        x = F.relu(x)
+        x = torch.flatten(x, start_dim=1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = torch.tanh(x)
+        x = self.fc3(x)
+        x = torch.tanh(x)
+        x = self.fc4(x)
+        x = torch.tanh(x)
+        x = self.fc5(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+# [end-cnn-functional-torch]
+
+# [start-cnn-setup-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, MultiCategoricalMixin
+
+
+# define the model
+class CNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+    def setup(self):
+        self.conv1 = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")
+        self.conv2 = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")
+        self.conv3 = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")
+        self.fc1 = nn.Dense(512)
+        self.fc2 = nn.Dense(16)
+        self.fc3 = nn.Dense(64)
+        self.fc4 = nn.Dense(32)
+        self.fc5 = nn.Dense(self.num_actions)
+
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = self.conv1(x)
+        x = nn.relu(x)
+        x = self.conv2(x)
+        x = nn.relu(x)
+        x = self.conv3(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = self.fc1(x)
+        x = nn.relu(x)
+        x = self.fc2(x)
+        x = nn.tanh(x)
+        x = self.fc3(x)
+        x = nn.tanh(x)
+        x = self.fc4(x)
+        x = nn.tanh(x)
+        x = self.fc5(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-setup-jax]
+
+# [start-cnn-compact-jax]
+import flax.linen as nn
+
+from skrl.models.jax import Model, MultiCategoricalMixin
+
+
+# define the model
+class CNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+        Model.__init__(self, observation_space, action_space, device, **kwargs)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+    @nn.compact  # marks the given module method allowing inlined submodules
+    def __call__(self, inputs, role):
+        x = inputs["states"].reshape((-1, *self.observation_space.shape))
+        x = nn.Conv(32, kernel_size=(8, 8), strides=(4, 4), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(4, 4), strides=(2, 2), padding="VALID")(x)
+        x = nn.relu(x)
+        x = nn.Conv(64, kernel_size=(3, 3), strides=(1, 1), padding="VALID")(x)
+        x = nn.relu(x)
+        x = x.reshape((x.shape[0], -1))
+        x = nn.Dense(512)(x)
+        x = nn.relu(x)
+        x = nn.Dense(16)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(64)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(32)(x)
+        x = nn.tanh(x)
+        x = nn.Dense(self.num_actions)(x)
+        return x, {}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = CNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum")
+
+# initialize model's state dict
+policy.init_state_dict("policy")
+# [end-cnn-compact-jax]
+
+# =============================================================================
+
+# [start-rnn-sequential-torch]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class RNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hout
+        self.sequence_length = sequence_length
+
+        self.rnn = nn.RNN(input_size=self.num_observations,
+                          hidden_size=self.hidden_size,
+                          num_layers=self.num_layers,
+                          batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.net = nn.Sequential(nn.Linear(self.hidden_size, 64),
+                                 nn.ReLU(),
+                                 nn.Linear(64, 32),
+                                 nn.ReLU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}}  # hidden states (D ∗ num_layers, N, Hout)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states = inputs["rnn"][0]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            # get the hidden states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states)
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, hidden_states = self.rnn(rnn_input, hidden_states)
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, hidden_states = self.rnn(rnn_input, hidden_states)
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        return self.net(rnn_output), {"rnn": [hidden_states]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = RNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum",
+             num_envs=env.num_envs,
+             num_layers=1,
+             hidden_size=64,
+             sequence_length=10)
+# [end-rnn-sequential-torch]
+
+# [start-rnn-functional-torch]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class RNN(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hout
+        self.sequence_length = sequence_length
+
+        self.rnn = nn.RNN(input_size=self.num_observations,
+                          hidden_size=self.hidden_size,
+                          num_layers=self.num_layers,
+                          batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.fc1 = nn.Linear(self.hidden_size, 64)
+        self.fc2 = nn.Linear(64, 32)
+        self.logits = nn.Linear(32, self.num_actions)
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}}  # hidden states (D ∗ num_layers, N, Hout)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states = inputs["rnn"][0]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            # get the hidden states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states)
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, hidden_states = self.rnn(rnn_input, hidden_states)
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, hidden_states = self.rnn(rnn_input, hidden_states)
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        x = self.fc1(rnn_output)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+
+        return self.logits(x), {"rnn": [hidden_states]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = RNN(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum",
+             num_envs=env.num_envs,
+             num_layers=1,
+             hidden_size=64,
+             sequence_length=10)
+# [end-rnn-functional-torch]
+
+# =============================================================================
+
+# [start-gru-sequential-torch]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class GRU(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hout
+        self.sequence_length = sequence_length
+
+        self.gru = nn.GRU(input_size=self.num_observations,
+                          hidden_size=self.hidden_size,
+                          num_layers=self.num_layers,
+                          batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.net = nn.Sequential(nn.Linear(self.hidden_size, 64),
+                                 nn.ReLU(),
+                                 nn.Linear(64, 32),
+                                 nn.ReLU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}}  # hidden states (D ∗ num_layers, N, Hout)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states = inputs["rnn"][0]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            # get the hidden states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states)
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, hidden_states = self.gru(rnn_input, hidden_states)
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, hidden_states = self.gru(rnn_input, hidden_states)
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        return self.net(rnn_output), {"rnn": [hidden_states]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = GRU(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum",
+             num_envs=env.num_envs,
+             num_layers=1,
+             hidden_size=64,
+             sequence_length=10)
+# [end-gru-sequential-torch]
+
+# [start-gru-functional-torch]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class GRU(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hout
+        self.sequence_length = sequence_length
+
+        self.gru = nn.GRU(input_size=self.num_observations,
+                          hidden_size=self.hidden_size,
+                          num_layers=self.num_layers,
+                          batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.fc1 = nn.Linear(self.hidden_size, 64)
+        self.fc2 = nn.Linear(64, 32)
+        self.logits = nn.Linear(32, self.num_actions)
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}}  # hidden states (D ∗ num_layers, N, Hout)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states = inputs["rnn"][0]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            # get the hidden states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states)
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, hidden_states = self.gru(rnn_input, hidden_states)
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, hidden_states = self.gru(rnn_input, hidden_states)
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        x = self.fc1(rnn_output)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+
+        return self.logits(x), {"rnn": [hidden_states]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = GRU(observation_space=env.observation_space,
+             action_space=env.action_space,
+             device=env.device,
+             unnormalized_log_prob=True,
+             reduction="sum",
+             num_envs=env.num_envs,
+             num_layers=1,
+             hidden_size=64,
+             sequence_length=10)
+# [end-gru-functional-torch]
+
+# =============================================================================
+
+# [start-lstm-sequential-torch]
+import torch
+import torch.nn as nn
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class LSTM(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hcell (Hout is Hcell because proj_size = 0)
+        self.sequence_length = sequence_length
+
+        self.lstm = nn.LSTM(input_size=self.num_observations,
+                            hidden_size=self.hidden_size,
+                            num_layers=self.num_layers,
+                            batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.net = nn.Sequential(nn.Linear(self.hidden_size, 64),
+                                 nn.ReLU(),
+                                 nn.Linear(64, 32),
+                                 nn.ReLU(),
+                                 nn.Linear(32, self.num_actions))
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size),    # hidden states (D ∗ num_layers, N, Hout)
+                                  (self.num_layers, self.num_envs, self.hidden_size)]}}  # cell states   (D ∗ num_layers, N, Hcell)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1])  # (D * num_layers, N, L, Hcell)
+            # get the hidden/cell states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+            cell_states = cell_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hcell)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states))
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    cell_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_states = (hidden_states, cell_states)
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states))
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states))
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        return self.net(rnn_output), {"rnn": [rnn_states[0], rnn_states[1]]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = LSTM(observation_space=env.observation_space,
+              action_space=env.action_space,
+              device=env.device,
+              unnormalized_log_prob=True,
+              reduction="sum",
+              num_envs=env.num_envs,
+              num_layers=1,
+              hidden_size=64,
+              sequence_length=10)
+# [end-lstm-sequential-torch]
+
+# [start-lstm-functional-torch]
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from skrl.models.torch import Model, MultiCategoricalMixin
+
+
+# define the model
+class LSTM(MultiCategoricalMixin, Model):
+    def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, reduction="sum",
+                 num_envs=1, num_layers=1, hidden_size=64, sequence_length=10):
+        Model.__init__(self, observation_space, action_space, device)
+        MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+
+        self.num_envs = num_envs
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size  # Hcell (Hout is Hcell because proj_size = 0)
+        self.sequence_length = sequence_length
+
+        self.lstm = nn.LSTM(input_size=self.num_observations,
+                            hidden_size=self.hidden_size,
+                            num_layers=self.num_layers,
+                            batch_first=True)  # batch_first -> (batch, sequence, features)
+
+        self.fc1 = nn.Linear(self.hidden_size, 64)
+        self.fc2 = nn.Linear(64, 32)
+        self.logits = nn.Linear(32, self.num_actions)
+
+    def get_specification(self):
+        # batch size (N) is the number of envs during rollout
+        return {"rnn": {"sequence_length": self.sequence_length,
+                        "sizes": [(self.num_layers, self.num_envs, self.hidden_size),    # hidden states (D ∗ num_layers, N, Hout)
+                                  (self.num_layers, self.num_envs, self.hidden_size)]}}  # cell states   (D ∗ num_layers, N, Hcell)
+
+    def compute(self, inputs, role):
+        states = inputs["states"]
+        terminated = inputs.get("terminated", None)
+        hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1]
+
+        # training
+        if self.training:
+            rnn_input = states.view(-1, self.sequence_length, states.shape[-1])  # (N, L, Hin): N=batch_size, L=sequence_length
+            hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1])  # (D * num_layers, N, L, Hout)
+            cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1])  # (D * num_layers, N, L, Hcell)
+            # get the hidden/cell states corresponding to the initial sequence
+            hidden_states = hidden_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hout)
+            cell_states = cell_states[:,:,0,:].contiguous()  # (D * num_layers, N, Hcell)
+
+            # reset the RNN state in the middle of a sequence
+            if terminated is not None and torch.any(terminated):
+                rnn_outputs = []
+                terminated = terminated.view(-1, self.sequence_length)
+                indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length]
+
+                for i in range(len(indexes) - 1):
+                    i0, i1 = indexes[i], indexes[i + 1]
+                    rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states))
+                    hidden_states[:, (terminated[:,i1-1]), :] = 0
+                    cell_states[:, (terminated[:,i1-1]), :] = 0
+                    rnn_outputs.append(rnn_output)
+
+                rnn_states = (hidden_states, cell_states)
+                rnn_output = torch.cat(rnn_outputs, dim=1)
+            # no need to reset the RNN state in the sequence
+            else:
+                rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states))
+        # rollout
+        else:
+            rnn_input = states.view(-1, 1, states.shape[-1])  # (N, L, Hin): N=num_envs, L=1
+            rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states))
+
+        # flatten the RNN output
+        rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1)  # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout)
+
+        x = self.fc1(rnn_output)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+
+        return self.logits(x), {"rnn": [rnn_states[0], rnn_states[1]]}
+
+
+# instantiate the model (assumes there is a wrapped environment: env)
+policy = LSTM(observation_space=env.observation_space,
+              action_space=env.action_space,
+              device=env.device,
+              unnormalized_log_prob=True,
+              reduction="sum",
+              num_envs=env.num_envs,
+              num_layers=1,
+              hidden_size=64,
+              sequence_length=10)
+# [end-lstm-functional-torch]
diff --git a/docs/source/snippets/trainer.py b/docs/source/snippets/trainer.py
index 7e4e7cbb..f2ce690a 100644
--- a/docs/source/snippets/trainer.py
+++ b/docs/source/snippets/trainer.py
@@ -199,15 +199,15 @@ def eval(self) -> None:
 
 # =============================================================================
 
-# [pytorch-start-manual]
-from skrl.trainers.torch import ManualTrainer
+# [pytorch-start-step]
+from skrl.trainers.torch import StepTrainer
 
 # assuming there is an environment called 'env'
 # and an agent or a list of agents called 'agents'
 
 # create a sequential trainer
 cfg = {"timesteps": 50000, "headless": False}
-trainer = ManualTrainer(env=env, agents=agents, cfg=cfg)
+trainer = StepTrainer(env=env, agents=agents, cfg=cfg)
 
 # train the agent(s)
 for timestep in range(cfg["timesteps"]):
@@ -216,18 +216,18 @@ def eval(self) -> None:
 # evaluate the agent(s)
 for timestep in range(cfg["timesteps"]):
     trainer.eval(timestep=timestep)
-# [pytorch-end-manual]
+# [pytorch-end-step]
 
 
-# [jax-start-manual]
-from skrl.trainers.jax import ManualTrainer
+# [jax-start-step]
+from skrl.trainers.jax import StepTrainer
 
 # assuming there is an environment called 'env'
 # and an agent or a list of agents called 'agents'
 
 # create a sequential trainer
 cfg = {"timesteps": 50000, "headless": False}
-trainer = ManualTrainer(env=env, agents=agents, cfg=cfg)
+trainer = StepTrainer(env=env, agents=agents, cfg=cfg)
 
 # train the agent(s)
 for timestep in range(cfg["timesteps"]):
@@ -236,4 +236,44 @@ def eval(self) -> None:
 # evaluate the agent(s)
 for timestep in range(cfg["timesteps"]):
     trainer.eval(timestep=timestep)
-# [jax-end-manual]
+# [jax-end-step]
+
+# =============================================================================
+
+# [pytorch-start-manual-training]
+
+# [pytorch-end-manual-training]
+
+# [pytorch-start-manual-evaluation]
+# assuming there is an environment named 'env'
+# and an agent named 'agents' (or a state-preprocessor and a policy)
+
+states, infos = env.reset()
+
+for i in range(1000):
+    # state-preprocessor + policy
+    with torch.no_grad():
+        states = state_preprocessor(states)
+        actions = policy.act({"states": states})[0]
+
+    # step the environment
+    next_states, rewards, terminated, truncated, infos = env.step(actions)
+
+    # render the environment
+    env.render()
+
+    # check for termination/truncation
+    if terminated.any() or truncated.any():
+        states, infos = env.reset()
+    else:
+        states = next_states
+# [pytorch-end-manual-evaluation]
+
+
+# [jax-start-manual-training]
+
+# [jax-end-manual-training]
+
+# [jax-start-manual-evaluation]
+
+# [jax-end-manual-evaluation]
diff --git a/pyproject.toml b/pyproject.toml
index 2afb84a0..6de499cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "skrl"
-version = "1.0.0"
+version = "1.1.0"
 description = "Modular and flexible library for reinforcement learning on PyTorch and JAX"
 readme = "README.md"
 requires-python = ">=3.6"
diff --git a/skrl/agents/jax/a2c/a2c.py b/skrl/agents/jax/a2c/a2c.py
index 60486358..fa1acb7b 100644
--- a/skrl/agents/jax/a2c/a2c.py
+++ b/skrl/agents/jax/a2c/a2c.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -169,7 +169,7 @@ def _value_loss(params):
 
 class A2C(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -270,7 +270,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/base.py b/skrl/agents/jax/base.py
index e1d82079..71e9d091 100644
--- a/skrl/agents/jax/base.py
+++ b/skrl/agents/jax/base.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import collections
 import copy
@@ -19,7 +19,7 @@
 
 class Agent:
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -132,7 +132,7 @@ def _get_internal_value(self, _module: Any) -> Any:
         """
         return _module.state_dict.params if hasattr(_module, "state_dict") else _module
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
 
         This method should be called before the agent is used.
diff --git a/skrl/agents/jax/cem/cem.py b/skrl/agents/jax/cem/cem.py
index 2b0c4b9a..65c401e9 100644
--- a/skrl/agents/jax/cem/cem.py
+++ b/skrl/agents/jax/cem/cem.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -52,7 +52,7 @@
 
 class CEM(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -130,7 +130,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/ddpg/ddpg.py b/skrl/agents/jax/ddpg/ddpg.py
index 769316ee..feb0e12f 100644
--- a/skrl/agents/jax/ddpg/ddpg.py
+++ b/skrl/agents/jax/ddpg/ddpg.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -111,7 +111,7 @@ def _policy_loss(policy_params, critic_params):
 
 class DDPG(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -214,7 +214,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/dqn/ddqn.py b/skrl/agents/jax/dqn/ddqn.py
index 0d0968c3..47f7f53b 100644
--- a/skrl/agents/jax/dqn/ddqn.py
+++ b/skrl/agents/jax/dqn/ddqn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -89,7 +89,7 @@ def _q_network_loss(params):
 
 class DDQN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -182,7 +182,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/dqn/dqn.py b/skrl/agents/jax/dqn/dqn.py
index 49cb6e34..dcec3140 100644
--- a/skrl/agents/jax/dqn/dqn.py
+++ b/skrl/agents/jax/dqn/dqn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -86,7 +86,7 @@ def _q_network_loss(params):
 
 class DQN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -179,7 +179,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/ppo/ppo.py b/skrl/agents/jax/ppo/ppo.py
index 01a4942b..3437b049 100644
--- a/skrl/agents/jax/ppo/ppo.py
+++ b/skrl/agents/jax/ppo/ppo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -188,7 +188,7 @@ def _value_loss(params):
 
 class PPO(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -296,7 +296,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/rpo/rpo.py b/skrl/agents/jax/rpo/rpo.py
index 281a7210..3e90d06d 100644
--- a/skrl/agents/jax/rpo/rpo.py
+++ b/skrl/agents/jax/rpo/rpo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -191,7 +191,7 @@ def _value_loss(params):
 
 class RPO(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -300,7 +300,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/sac/sac.py b/skrl/agents/jax/sac/sac.py
index 5a7a7c5a..75cecf11 100644
--- a/skrl/agents/jax/sac/sac.py
+++ b/skrl/agents/jax/sac/sac.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -122,7 +122,7 @@ def _entropy_loss(params):
 
 class SAC(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -255,7 +255,7 @@ def value(self):
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/jax/td3/td3.py b/skrl/agents/jax/td3/td3.py
index 568452f4..1c2544d8 100644
--- a/skrl/agents/jax/td3/td3.py
+++ b/skrl/agents/jax/td3/td3.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import functools
@@ -130,7 +130,7 @@ def _policy_loss(policy_params, critic_1_params):
 
 class TD3(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -250,7 +250,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py
index 8903a180..97af7cb3 100644
--- a/skrl/agents/torch/a2c/a2c.py
+++ b/skrl/agents/torch/a2c/a2c.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -59,7 +59,7 @@
 
 class A2C(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -153,7 +153,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/a2c/a2c_rnn.py b/skrl/agents/torch/a2c/a2c_rnn.py
index c17e38e2..9b24bc09 100644
--- a/skrl/agents/torch/a2c/a2c_rnn.py
+++ b/skrl/agents/torch/a2c/a2c_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -59,7 +59,7 @@
 
 class A2C_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -153,7 +153,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py
index 28889f9e..e9311dae 100644
--- a/skrl/agents/torch/amp/amp.py
+++ b/skrl/agents/torch/amp/amp.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -76,7 +76,7 @@
 
 class AMP(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -218,7 +218,7 @@ def __init__(self,
         else:
             self._amp_state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py
index b6355f52..237a0953 100644
--- a/skrl/agents/torch/base.py
+++ b/skrl/agents/torch/base.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import collections
 import copy
@@ -18,7 +18,7 @@
 
 class Agent:
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -125,7 +125,7 @@ def _get_internal_value(self, _module: Any) -> Any:
         """
         return _module.state_dict() if hasattr(_module, "state_dict") else _module
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
 
         This method should be called before the agent is used.
diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py
index dd5e1ff3..a99eff5a 100644
--- a/skrl/agents/torch/cem/cem.py
+++ b/skrl/agents/torch/cem/cem.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -49,7 +49,7 @@
 
 class CEM(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -126,7 +126,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py
index ede9b7c4..a5270909 100644
--- a/skrl/agents/torch/ddpg/ddpg.py
+++ b/skrl/agents/torch/ddpg/ddpg.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -60,7 +60,7 @@
 
 class DDPG(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -161,7 +161,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/ddpg/ddpg_rnn.py b/skrl/agents/torch/ddpg/ddpg_rnn.py
index 1d52937a..e1a8142e 100644
--- a/skrl/agents/torch/ddpg/ddpg_rnn.py
+++ b/skrl/agents/torch/ddpg/ddpg_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -60,7 +60,7 @@
 
 class DDPG_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -161,7 +161,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py
index 479ba934..84352027 100644
--- a/skrl/agents/torch/dqn/ddqn.py
+++ b/skrl/agents/torch/dqn/ddqn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import math
@@ -59,7 +59,7 @@
 
 class DDQN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -150,7 +150,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py
index 782befaf..4c485524 100644
--- a/skrl/agents/torch/dqn/dqn.py
+++ b/skrl/agents/torch/dqn/dqn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import math
@@ -59,7 +59,7 @@
 
 class DQN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -150,7 +150,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py
index 2c035a6a..8c2315bd 100644
--- a/skrl/agents/torch/ppo/ppo.py
+++ b/skrl/agents/torch/ppo/ppo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -66,7 +66,7 @@
 
 class PPO(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -167,7 +167,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/ppo/ppo_rnn.py b/skrl/agents/torch/ppo/ppo_rnn.py
index 8086995e..ccabafca 100644
--- a/skrl/agents/torch/ppo/ppo_rnn.py
+++ b/skrl/agents/torch/ppo/ppo_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -66,7 +66,7 @@
 
 class PPO_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -167,7 +167,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py
index 5be6ac97..16212d8f 100644
--- a/skrl/agents/torch/q_learning/q_learning.py
+++ b/skrl/agents/torch/q_learning/q_learning.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -39,7 +39,7 @@
 
 class Q_LEARNING(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -99,7 +99,7 @@ def __init__(self,
         self._current_next_states = None
         self._current_dones = None
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/rpo/rpo.py b/skrl/agents/torch/rpo/rpo.py
index e7528c33..5929f54e 100644
--- a/skrl/agents/torch/rpo/rpo.py
+++ b/skrl/agents/torch/rpo/rpo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -67,7 +67,7 @@
 
 class RPO(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -169,7 +169,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/rpo/rpo_rnn.py b/skrl/agents/torch/rpo/rpo_rnn.py
index 1060adb7..382d1efb 100644
--- a/skrl/agents/torch/rpo/rpo_rnn.py
+++ b/skrl/agents/torch/rpo/rpo_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -67,7 +67,7 @@
 
 class RPO_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -169,7 +169,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py
index d41b4830..22468d80 100644
--- a/skrl/agents/torch/sac/sac.py
+++ b/skrl/agents/torch/sac/sac.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -60,7 +60,7 @@
 
 class SAC(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -179,7 +179,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/sac/sac_rnn.py b/skrl/agents/torch/sac/sac_rnn.py
index 501dc122..755cbeab 100644
--- a/skrl/agents/torch/sac/sac_rnn.py
+++ b/skrl/agents/torch/sac/sac_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -60,7 +60,7 @@
 
 class SAC_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -179,7 +179,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py
index bb717025..4cc14c34 100644
--- a/skrl/agents/torch/sarsa/sarsa.py
+++ b/skrl/agents/torch/sarsa/sarsa.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -39,7 +39,7 @@
 
 class SARSA(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -99,7 +99,7 @@ def __init__(self,
         self._current_next_states = None
         self._current_dones = None
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py
index a6eb23c8..86275243 100644
--- a/skrl/agents/torch/td3/td3.py
+++ b/skrl/agents/torch/td3/td3.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -66,7 +66,7 @@
 
 class TD3(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -182,7 +182,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/td3/td3_rnn.py b/skrl/agents/torch/td3/td3_rnn.py
index ea906b15..fdd619d8 100644
--- a/skrl/agents/torch/td3/td3_rnn.py
+++ b/skrl/agents/torch/td3/td3_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import itertools
@@ -66,7 +66,7 @@
 
 class TD3_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -182,7 +182,7 @@ def __init__(self,
         else:
             self._state_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py
index b7f16036..2c00b69b 100644
--- a/skrl/agents/torch/trpo/trpo.py
+++ b/skrl/agents/torch/trpo/trpo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -65,7 +65,7 @@
 
 class TRPO(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -164,7 +164,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/agents/torch/trpo/trpo_rnn.py b/skrl/agents/torch/trpo/trpo_rnn.py
index bc8ae463..58b187e5 100644
--- a/skrl/agents/torch/trpo/trpo_rnn.py
+++ b/skrl/agents/torch/trpo/trpo_rnn.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Mapping, Optional, Tuple, Union
 
 import copy
 import gym
@@ -65,7 +65,7 @@
 
 class TRPO_RNN(Agent):
     def __init__(self,
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memory: Optional[Union[Memory, Tuple[Memory]]] = None,
                  observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
                  action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None,
@@ -164,7 +164,7 @@ def __init__(self,
         else:
             self._value_preprocessor = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/envs/wrappers/jax/base.py b/skrl/envs/wrappers/jax/base.py
index 09170dcc..0be8b742 100644
--- a/skrl/envs/wrappers/jax/base.py
+++ b/skrl/envs/wrappers/jax/base.py
@@ -26,6 +26,14 @@ def __init__(self, env: Any) -> None:
                 self.device = jax.devices(self._env.device.split(':')[0] if type(self._env.device) == str else self._env.device.type)[0]
             except RuntimeError:
                 pass
+        # spaces
+        try:
+            self._action_space = self._env.single_action_space
+            self._observation_space = self._env.single_observation_space
+        except AttributeError:
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        self._state_space = self._env.state_space if hasattr(self._env, "state_space") else self._observation_space
 
     def __getattr__(self, key: str) -> Any:
         """Get an attribute from the wrapped environment
@@ -100,19 +108,19 @@ def state_space(self) -> gym.Space:
         If the wrapped environment does not have the ``state_space`` property,
         the value of the ``observation_space`` property will be used
         """
-        return self._env.state_space if hasattr(self._env, "state_space") else self._env.observation_space
+        return self._state_space
 
     @property
     def observation_space(self) -> gym.Space:
         """Observation space
         """
-        return self._env.observation_space
+        return self._observation_space
 
     @property
     def action_space(self) -> gym.Space:
         """Action space
         """
-        return self._env.action_space
+        return self._action_space
 
 
 class MultiAgentEnvWrapper(object):
diff --git a/skrl/envs/wrappers/jax/bidexhands_envs.py b/skrl/envs/wrappers/jax/bidexhands_envs.py
index 2ca535f9..fe63563c 100644
--- a/skrl/envs/wrappers/jax/bidexhands_envs.py
+++ b/skrl/envs/wrappers/jax/bidexhands_envs.py
@@ -5,8 +5,13 @@
 import jax
 import jax.dlpack
 import numpy as np
-import torch
-import torch.utils.dlpack
+
+
+try:
+    import torch
+    import torch.utils.dlpack
+except:
+    pass  # TODO: show warning message
 
 from skrl.envs.wrappers.jax.base import MultiAgentEnvWrapper
 
diff --git a/skrl/envs/wrappers/jax/gym_envs.py b/skrl/envs/wrappers/jax/gym_envs.py
index 685bbbb6..3b073d9e 100644
--- a/skrl/envs/wrappers/jax/gym_envs.py
+++ b/skrl/envs/wrappers/jax/gym_envs.py
@@ -113,6 +113,8 @@ def _tensor_to_action(self, actions: np.ndarray) -> Any:
                     return actions.astype(space[0].dtype).reshape(-1)
         elif isinstance(space, gym.spaces.Discrete):
             return actions.item()
+        elif isinstance(space, gym.spaces.MultiDiscrete):
+            return actions.astype(space.dtype).reshape(space.shape)
         elif isinstance(space, gym.spaces.Box):
             return actions.astype(space.dtype).reshape(space.shape)
         raise ValueError(f"Action space type {type(space)} not supported. Please report this issue")
diff --git a/skrl/envs/wrappers/jax/gymnasium_envs.py b/skrl/envs/wrappers/jax/gymnasium_envs.py
index c2cc1d9f..44a45633 100644
--- a/skrl/envs/wrappers/jax/gymnasium_envs.py
+++ b/skrl/envs/wrappers/jax/gymnasium_envs.py
@@ -108,6 +108,8 @@ def _tensor_to_action(self, actions: np.ndarray) -> Any:
                     return actions.astype(space[0].dtype).reshape(-1)
         if isinstance(space, gymnasium.spaces.Discrete):
             return actions.item()
+        elif isinstance(space, gymnasium.spaces.MultiDiscrete):
+            return actions.astype(space.dtype).reshape(space.shape)
         elif isinstance(space, gymnasium.spaces.Box):
             return actions.astype(space.dtype).reshape(space.shape)
         raise ValueError(f"Action space type {type(space)} not supported. Please report this issue")
diff --git a/skrl/envs/wrappers/jax/isaac_orbit_envs.py b/skrl/envs/wrappers/jax/isaac_orbit_envs.py
index 68adacdb..c1e897c9 100644
--- a/skrl/envs/wrappers/jax/isaac_orbit_envs.py
+++ b/skrl/envs/wrappers/jax/isaac_orbit_envs.py
@@ -3,8 +3,13 @@
 import jax
 import jax.dlpack as jax_dlpack
 import numpy as np
-import torch
-import torch.utils.dlpack as torch_dlpack
+
+
+try:
+    import torch
+    import torch.utils.dlpack as torch_dlpack
+except:
+    pass  # TODO: show warning message
 
 from skrl import logger
 from skrl.envs.wrappers.jax.base import Wrapper
@@ -53,10 +58,10 @@ def step(self, actions: Union[np.ndarray, jax.Array]) -> \
         actions = _jax2torch(actions, self._env.device, self._jax)
 
         with torch.no_grad():
-            self._obs_dict, reward, terminated, info = self._env.step(actions)
+            self._obs_dict, reward, terminated, truncated, info = self._env.step(actions)
 
         terminated = terminated.to(dtype=torch.int8)
-        truncated = info["time_outs"].to(dtype=torch.int8) if "time_outs" in info else torch.zeros_like(terminated)
+        truncated = truncated.to(dtype=torch.int8)
 
         return _torch2jax(self._obs_dict["policy"], self._jax), \
                _torch2jax(reward.view(-1, 1), self._jax), \
@@ -71,9 +76,9 @@ def reset(self) -> Tuple[Union[np.ndarray, jax.Array], Any]:
         :rtype: np.ndarray or jax.Array and any other info
         """
         if self._reset_once:
-            self._obs_dict = self._env.reset()
+            self._obs_dict, info = self._env.reset()
             self._reset_once = False
-        return _torch2jax(self._obs_dict["policy"], self._jax), {}
+        return _torch2jax(self._obs_dict["policy"], self._jax), info
 
     def render(self, *args, **kwargs) -> None:
         """Render the environment
diff --git a/skrl/envs/wrappers/jax/isaacgym_envs.py b/skrl/envs/wrappers/jax/isaacgym_envs.py
index a426a557..459444b9 100644
--- a/skrl/envs/wrappers/jax/isaacgym_envs.py
+++ b/skrl/envs/wrappers/jax/isaacgym_envs.py
@@ -3,8 +3,13 @@
 import jax
 import jax.dlpack as jax_dlpack
 import numpy as np
-import torch
-import torch.utils.dlpack as torch_dlpack
+
+
+try:
+    import torch
+    import torch.utils.dlpack as torch_dlpack
+except:
+    pass  # TODO: show warning message
 
 from skrl import logger
 from skrl.envs.wrappers.jax.base import Wrapper
diff --git a/skrl/envs/wrappers/jax/omniverse_isaacgym_envs.py b/skrl/envs/wrappers/jax/omniverse_isaacgym_envs.py
index 23105bc1..e49ee786 100644
--- a/skrl/envs/wrappers/jax/omniverse_isaacgym_envs.py
+++ b/skrl/envs/wrappers/jax/omniverse_isaacgym_envs.py
@@ -3,8 +3,13 @@
 import jax
 import jax.dlpack as jax_dlpack
 import numpy as np
-import torch
-import torch.utils.dlpack as torch_dlpack
+
+
+try:
+    import torch
+    import torch.utils.dlpack as torch_dlpack
+except:
+    pass  # TODO: show warning message
 
 from skrl import logger
 from skrl.envs.wrappers.jax.base import Wrapper
diff --git a/skrl/envs/wrappers/torch/base.py b/skrl/envs/wrappers/torch/base.py
index 233c2a08..85e79b08 100644
--- a/skrl/envs/wrappers/torch/base.py
+++ b/skrl/envs/wrappers/torch/base.py
@@ -19,6 +19,14 @@ def __init__(self, env: Any) -> None:
             self.device = torch.device(self._env.device)
         else:
             self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        # spaces
+        try:
+            self._action_space = self._env.single_action_space
+            self._observation_space = self._env.single_observation_space
+        except AttributeError:
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        self._state_space = self._env.state_space if hasattr(self._env, "state_space") else self._observation_space
 
     def __getattr__(self, key: str) -> Any:
         """Get an attribute from the wrapped environment
@@ -91,19 +99,19 @@ def state_space(self) -> gym.Space:
         If the wrapped environment does not have the ``state_space`` property,
         the value of the ``observation_space`` property will be used
         """
-        return self._env.state_space if hasattr(self._env, "state_space") else self._env.observation_space
+        return self._state_space
 
     @property
     def observation_space(self) -> gym.Space:
         """Observation space
         """
-        return self._env.observation_space
+        return self._observation_space
 
     @property
     def action_space(self) -> gym.Space:
         """Action space
         """
-        return self._env.action_space
+        return self._action_space
 
 
 class MultiAgentEnvWrapper(object):
diff --git a/skrl/envs/wrappers/torch/gym_envs.py b/skrl/envs/wrappers/torch/gym_envs.py
index ee50cbf9..7b8b6af8 100644
--- a/skrl/envs/wrappers/torch/gym_envs.py
+++ b/skrl/envs/wrappers/torch/gym_envs.py
@@ -113,6 +113,8 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any:
                     return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(-1)
         elif isinstance(space, gym.spaces.Discrete):
             return actions.item()
+        elif isinstance(space, gym.spaces.MultiDiscrete):
+            return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
         elif isinstance(space, gym.spaces.Box):
             return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
         raise ValueError(f"Action space type {type(space)} not supported. Please report this issue")
diff --git a/skrl/envs/wrappers/torch/gymnasium_envs.py b/skrl/envs/wrappers/torch/gymnasium_envs.py
index 0009575b..74db7e0d 100644
--- a/skrl/envs/wrappers/torch/gymnasium_envs.py
+++ b/skrl/envs/wrappers/torch/gymnasium_envs.py
@@ -108,6 +108,8 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any:
                     return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(-1)
         if isinstance(space, gymnasium.spaces.Discrete):
             return actions.item()
+        elif isinstance(space, gymnasium.spaces.MultiDiscrete):
+            return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
         elif isinstance(space, gymnasium.spaces.Box):
             return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape)
         raise ValueError(f"Action space type {type(space)} not supported. Please report this issue")
diff --git a/skrl/envs/wrappers/torch/isaac_orbit_envs.py b/skrl/envs/wrappers/torch/isaac_orbit_envs.py
index e558c698..c9670ce1 100644
--- a/skrl/envs/wrappers/torch/isaac_orbit_envs.py
+++ b/skrl/envs/wrappers/torch/isaac_orbit_envs.py
@@ -26,8 +26,7 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch
         :return: Observation, reward, terminated, truncated, info
         :rtype: tuple of torch.Tensor and any other info
         """
-        self._obs_dict, reward, terminated, info = self._env.step(actions)
-        truncated = info["time_outs"] if "time_outs" in info else torch.zeros_like(terminated)
+        self._obs_dict, reward, terminated, truncated, info = self._env.step(actions)
         return self._obs_dict["policy"], reward.view(-1, 1), terminated.view(-1, 1), truncated.view(-1, 1), info
 
     def reset(self) -> Tuple[torch.Tensor, Any]:
@@ -37,9 +36,9 @@ def reset(self) -> Tuple[torch.Tensor, Any]:
         :rtype: torch.Tensor and any other info
         """
         if self._reset_once:
-            self._obs_dict = self._env.reset()
+            self._obs_dict, info = self._env.reset()
             self._reset_once = False
-        return self._obs_dict["policy"], {}
+        return self._obs_dict["policy"], info
 
     def render(self, *args, **kwargs) -> None:
         """Render the environment
diff --git a/skrl/memories/jax/base.py b/skrl/memories/jax/base.py
index f0ea27a2..41cea1b1 100644
--- a/skrl/memories/jax/base.py
+++ b/skrl/memories/jax/base.py
@@ -126,6 +126,8 @@ def _get_space_size(self,
         elif issubclass(type(space), gym.Space):
             if issubclass(type(space), gym.spaces.Discrete):
                 return (1,) if keep_dimensions else 1
+            elif issubclass(type(space), gym.spaces.MultiDiscrete):
+                return space.nvec.shape[0]
             elif issubclass(type(space), gym.spaces.Box):
                 return tuple(space.shape) if keep_dimensions else np.prod(space.shape)
             elif issubclass(type(space), gym.spaces.Dict):
@@ -135,6 +137,8 @@ def _get_space_size(self,
         elif issubclass(type(space), gymnasium.Space):
             if issubclass(type(space), gymnasium.spaces.Discrete):
                 return (1,) if keep_dimensions else 1
+            elif issubclass(type(space), gymnasium.spaces.MultiDiscrete):
+                return space.nvec.shape[0]
             elif issubclass(type(space), gymnasium.spaces.Box):
                 return tuple(space.shape) if keep_dimensions else np.prod(space.shape)
             elif issubclass(type(space), gymnasium.spaces.Dict):
diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py
index da10626f..7d0f0615 100644
--- a/skrl/memories/torch/base.py
+++ b/skrl/memories/torch/base.py
@@ -102,6 +102,8 @@ def _get_space_size(self,
         elif issubclass(type(space), gym.Space):
             if issubclass(type(space), gym.spaces.Discrete):
                 return (1,) if keep_dimensions else 1
+            elif issubclass(type(space), gym.spaces.MultiDiscrete):
+                return space.nvec.shape[0]
             elif issubclass(type(space), gym.spaces.Box):
                 return tuple(space.shape) if keep_dimensions else np.prod(space.shape)
             elif issubclass(type(space), gym.spaces.Dict):
@@ -111,6 +113,8 @@ def _get_space_size(self,
         elif issubclass(type(space), gymnasium.Space):
             if issubclass(type(space), gymnasium.spaces.Discrete):
                 return (1,) if keep_dimensions else 1
+            elif issubclass(type(space), gymnasium.spaces.MultiDiscrete):
+                return space.nvec.shape[0]
             elif issubclass(type(space), gymnasium.spaces.Box):
                 return tuple(space.shape) if keep_dimensions else np.prod(space.shape)
             elif issubclass(type(space), gymnasium.spaces.Dict):
diff --git a/skrl/models/jax/__init__.py b/skrl/models/jax/__init__.py
index fd3b1064..ef2386fd 100644
--- a/skrl/models/jax/__init__.py
+++ b/skrl/models/jax/__init__.py
@@ -3,3 +3,4 @@
 from skrl.models.jax.categorical import CategoricalMixin
 from skrl.models.jax.deterministic import DeterministicMixin
 from skrl.models.jax.gaussian import GaussianMixin
+from skrl.models.jax.multicategorical import MultiCategoricalMixin
diff --git a/skrl/models/jax/multicategorical.py b/skrl/models/jax/multicategorical.py
new file mode 100644
index 00000000..9ad1bb3e
--- /dev/null
+++ b/skrl/models/jax/multicategorical.py
@@ -0,0 +1,192 @@
+from typing import Any, Mapping, Optional, Tuple, Union
+
+from functools import partial
+
+import flax
+import jax
+import jax.numpy as jnp
+import numpy as np
+
+from skrl import config
+
+
+# https://jax.readthedocs.io/en/latest/faq.html#strategy-1-jit-compiled-helper-function
+@partial(jax.jit, static_argnames=("unnormalized_log_prob"))
+def _categorical(net_output,
+                 unnormalized_log_prob,
+                 taken_actions,
+                 key):
+    # normalize
+    if unnormalized_log_prob:
+        logits = net_output - jax.scipy.special.logsumexp(net_output, axis=-1, keepdims=True)
+        # probs = jax.nn.softmax(logits)
+    else:
+        probs = net_output / net_output.sum(-1, keepdims=True)
+        eps = jnp.finfo(probs.dtype).eps
+        logits = jnp.log(probs.clip(min=eps, max=1 - eps))
+
+    # sample actions
+    actions = jax.random.categorical(key, logits, axis=-1, shape=None)
+
+    # log of the probability density function
+    taken_actions = actions if taken_actions is None else taken_actions.astype(jnp.int32).reshape(-1)
+    log_prob = jax.nn.log_softmax(logits)[jnp.arange(taken_actions.shape[0]), taken_actions]
+
+    return actions.reshape(-1, 1), log_prob.reshape(-1, 1)
+
+@jax.jit
+def _entropy(logits):
+    logits = logits - jax.scipy.special.logsumexp(logits, axis=-1, keepdims=True)
+    logits = logits.clip(min=jnp.finfo(logits.dtype).min)
+    p_log_p = logits * jax.nn.softmax(logits)
+    return -p_log_p.sum(-1)
+
+
+class MultiCategoricalMixin:
+    def __init__(self, unnormalized_log_prob: bool = True, reduction: str = "sum", role: str = "") -> None:
+        """MultiCategorical mixin model (stochastic model)
+
+        :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: ``True``).
+                                      If True, the model's output is interpreted as unnormalized log probabilities
+                                      (it can be any real number), otherwise as normalized probabilities
+                                      (the output must be non-negative, finite and have a non-zero sum)
+        :type unnormalized_log_prob: bool, optional
+        :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``).
+                          Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density
+                          function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
+        :type reduction: str, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :raises ValueError: If the reduction method is not valid
+
+        Example::
+
+            # define the model
+            >>> import flax.linen as nn
+            >>> from skrl.models.jax import Model, MultiCategoricalMixin
+            >>>
+            >>> class Policy(MultiCategoricalMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device=None, unnormalized_log_prob=True, reduction="sum", **kwargs):
+            ...         Model.__init__(self, observation_space, action_space, device, **kwargs)
+            ...         MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+            ...
+            ...     @nn.compact  # marks the given module method allowing inlined submodules
+            ...     def __call__(self, inputs, role):
+            ...         x = nn.elu(nn.Dense(32)(inputs["states"]))
+            ...         x = nn.elu(nn.Dense(32)(x))
+            ...         x = nn.Dense(self.num_actions)(x)
+            ...         return x, {}
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (4,)
+            >>> # and an action_space: gym.spaces.MultiDiscrete with nvec = [3, 2]
+            >>> model = Policy(observation_space, action_space)
+            >>>
+            >>> print(model)
+            Policy(
+                # attributes
+                observation_space = Box(-1.0, 1.0, (4,), float32)
+                action_space = MultiDiscrete([3 2])
+                device = StreamExecutorGpuDevice(id=0, process_index=0, slice_index=0)
+            )
+        """
+        self._unnormalized_log_prob = unnormalized_log_prob
+
+        if reduction not in ["mean", "sum", "prod", "none"]:
+            raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'")
+        self._reduction = jnp.mean if reduction == "mean" else jnp.sum if reduction == "sum" \
+            else jnp.prod if reduction == "prod" else None
+
+        self._i = 0
+        self._key = config.jax.key
+
+        self._action_space_nvec = np.cumsum(self.action_space.nvec).tolist()
+        self._action_space_shape = self._get_space_size(self.action_space, number_of_elements=False)
+
+        # https://flax.readthedocs.io/en/latest/api_reference/flax.errors.html#flax.errors.IncorrectPostInitOverrideError
+        flax.linen.Module.__post_init__(self)
+
+    def act(self,
+            inputs: Mapping[str, Union[Union[np.ndarray, jax.Array], Any]],
+            role: str = "",
+            params: Optional[jax.Array] = None) -> Tuple[jax.Array, Union[jax.Array, None], Mapping[str, Union[jax.Array, Any]]]:
+        """Act stochastically in response to the state of the environment
+
+        :param inputs: Model inputs. The most common keys are:
+
+                       - ``"states"``: state of the environment used to make the decision
+                       - ``"taken_actions"``: actions taken by the policy for the given states
+        :type inputs: dict where the values are typically np.ndarray or jax.Array
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+        :param params: Parameters used to compute the output (default: ``None``).
+                       If ``None``, internal parameters will be used
+        :type params: jnp.array
+
+        :return: Model output. The first component is the action to be taken by the agent.
+                 The second component is the log of the probability density function.
+                 The third component is a dictionary containing the network output ``"net_output"``
+                 and extra output values
+        :rtype: tuple of jax.Array, jax.Array or None, and dict
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 4)
+            >>> actions, log_prob, outputs = model.act({"states": states})
+            >>> print(actions.shape, log_prob.shape, outputs["net_output"].shape)
+            (4096, 2) (4096, 1) (4096, 5)
+        """
+        self._i += 1
+        subkey = jax.random.fold_in(self._key, self._i)
+        inputs["key"] = subkey
+
+        # map from states/observations to normalized probabilities or unnormalized log probabilities
+        net_output, outputs = self.apply(self.state_dict.params if params is None else params, inputs, role)
+
+        # split inputs
+        net_outputs = jnp.split(net_output, self._action_space_nvec, axis=-1)
+        if "taken_actions" in inputs:
+            taken_actions = jnp.split(inputs["taken_actions"], self._action_space_shape, axis=-1)
+        else:
+            taken_actions = [None] * self._action_space_shape
+
+        # compute actions and log_prob
+        actions, log_prob = [], []
+        for _net_output, _taken_actions in zip(net_outputs, taken_actions):
+            _actions, _log_prob = _categorical(_net_output,
+                                               self._unnormalized_log_prob,
+                                               _taken_actions,
+                                               subkey)
+            actions.append(_actions)
+            log_prob.append(_log_prob)
+
+        actions = jnp.concatenate(actions, axis=-1)
+        log_prob = jnp.concatenate(log_prob, axis=-1)
+
+        if self._reduction is not None:
+            log_prob = self._reduction(log_prob, axis=-1)
+        if log_prob.ndim != actions.ndim:
+            log_prob = jnp.expand_dims(log_prob, -1)
+
+        outputs["net_output"] = net_output
+        # avoid jax.errors.UnexpectedTracerError
+        outputs["stddev"] = jnp.full_like(log_prob, jnp.nan)
+        return actions, log_prob, outputs
+
+    def get_entropy(self, logits: jax.Array, role: str = "") -> jax.Array:
+        """Compute and return the entropy of the model
+
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :return: Entropy of the model
+        :rtype: jax.Array
+
+        Example::
+
+            # given a standard deviation array: stddev
+            >>> entropy = model.get_entropy(stddev)
+            >>> print(entropy.shape)
+            (4096, 8)
+        """
+        return _entropy(logits)
diff --git a/skrl/models/torch/__init__.py b/skrl/models/torch/__init__.py
index c7be0d71..774ebfeb 100644
--- a/skrl/models/torch/__init__.py
+++ b/skrl/models/torch/__init__.py
@@ -3,5 +3,6 @@
 from skrl.models.torch.categorical import CategoricalMixin
 from skrl.models.torch.deterministic import DeterministicMixin
 from skrl.models.torch.gaussian import GaussianMixin
+from skrl.models.torch.multicategorical import MultiCategoricalMixin
 from skrl.models.torch.multivariate_gaussian import MultivariateGaussianMixin
 from skrl.models.torch.tabular import TabularMixin
diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py
index 9181dc89..6b338ca5 100644
--- a/skrl/models/torch/categorical.py
+++ b/skrl/models/torch/categorical.py
@@ -52,13 +52,8 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None:
               )
             )
         """
-        if not hasattr(self, "_c_unnormalized_log_prob"):
-            self._c_unnormalized_log_prob = {}
-        self._c_unnormalized_log_prob[role] = unnormalized_log_prob
-
-        if not hasattr(self, "_c_distribution"):
-            self._c_distribution = {}
-        self._c_distribution[role] = None
+        self._unnormalized_log_prob = unnormalized_log_prob
+        self._distribution = None
 
     def act(self,
             inputs: Mapping[str, Union[torch.Tensor, Any]],
@@ -90,15 +85,15 @@ def act(self,
         net_output, outputs = self.compute(inputs, role)
 
         # unnormalized log probabilities
-        if self._c_unnormalized_log_prob[role] if role in self._c_unnormalized_log_prob else self._c_unnormalized_log_prob[""]:
-            self._c_distribution[role] = Categorical(logits=net_output)
+        if self._unnormalized_log_prob:
+            self._distribution = Categorical(logits=net_output)
         # normalized probabilities
         else:
-            self._c_distribution[role] = Categorical(probs=net_output)
+            self._distribution = Categorical(probs=net_output)
 
         # actions and log of the probability density function
-        actions = self._c_distribution[role].sample()
-        log_prob = self._c_distribution[role].log_prob(inputs.get("taken_actions", actions).view(-1))
+        actions = self._distribution.sample()
+        log_prob = self._distribution.log_prob(inputs.get("taken_actions", actions).view(-1))
 
         outputs["net_output"] = net_output
         return actions.unsqueeze(-1), log_prob.unsqueeze(-1), outputs
@@ -117,10 +112,9 @@ def get_entropy(self, role: str = "") -> torch.Tensor:
             >>> print(entropy.shape)
             torch.Size([4096, 1])
         """
-        distribution = self._c_distribution[role] if role in self._c_distribution else self._c_distribution[""]
-        if distribution is None:
+        if self._distribution is None:
             return torch.tensor(0.0, device=self.device)
-        return distribution.entropy().to(self.device)
+        return self._distribution.entropy().to(self.device)
 
     def distribution(self, role: str = "") -> torch.distributions.Categorical:
         """Get the current distribution of the model
@@ -136,4 +130,4 @@ def distribution(self, role: str = "") -> torch.distributions.Categorical:
             >>> print(distribution)
             Categorical(probs: torch.Size([4096, 2]), logits: torch.Size([4096, 2]))
         """
-        return self._c_distribution[role] if role in self._c_distribution else self._c_distribution[""]
+        return self._distribution
diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py
index a5624104..af6cdce5 100644
--- a/skrl/models/torch/deterministic.py
+++ b/skrl/models/torch/deterministic.py
@@ -51,14 +51,12 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None:
               )
             )
         """
-        if not hasattr(self, "_d_clip_actions"):
-            self._d_clip_actions = {}
-        self._d_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
+        self._clip_actions = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
             issubclass(type(self.action_space), gymnasium.Space))
 
-        if self._d_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
+        if self._clip_actions:
+            self._clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self._clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
 
     def act(self,
             inputs: Mapping[str, Union[torch.Tensor, Any]],
@@ -88,7 +86,7 @@ def act(self,
         actions, outputs = self.compute(inputs, role)
 
         # clip actions
-        if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]:
-            actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
+        if self._clip_actions:
+            actions = torch.clamp(actions, min=self._clip_actions_min, max=self._clip_actions_max)
 
         return actions, None, outputs
diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py
index fb4f13d6..a9721b63 100644
--- a/skrl/models/torch/gaussian.py
+++ b/skrl/models/torch/gaussian.py
@@ -72,40 +72,24 @@ def __init__(self,
               )
             )
         """
-        if not hasattr(self, "_g_clip_actions"):
-            self._g_clip_actions = {}
-        self._g_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
+        self._clip_actions = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
             issubclass(type(self.action_space), gymnasium.Space))
 
-        if self._g_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
-
-        if not hasattr(self, "_g_clip_log_std"):
-            self._g_clip_log_std = {}
-        self._g_clip_log_std[role] = clip_log_std
-        if not hasattr(self, "_g_log_std_min"):
-            self._g_log_std_min = {}
-        self._g_log_std_min[role] = min_log_std
-        if not hasattr(self, "_g_log_std_max"):
-            self._g_log_std_max = {}
-        self._g_log_std_max[role] = max_log_std
-
-        if not hasattr(self, "_g_log_std"):
-            self._g_log_std = {}
-        self._g_log_std[role] = None
-        if not hasattr(self, "_g_num_samples"):
-            self._g_num_samples = {}
-        self._g_num_samples[role] = None
-        if not hasattr(self, "_g_distribution"):
-            self._g_distribution = {}
-        self._g_distribution[role] = None
+        if self._clip_actions:
+            self._clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self._clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
+
+        self._clip_log_std = clip_log_std
+        self._log_std_min = min_log_std
+        self._log_std_max = max_log_std
+
+        self._log_std = None
+        self._num_samples = None
+        self._distribution = None
 
         if reduction not in ["mean", "sum", "prod", "none"]:
             raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'")
-        if not hasattr(self, "_g_reduction"):
-            self._g_reduction = {}
-        self._g_reduction[role] = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
+        self._reduction = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
             else torch.prod if reduction == "prod" else None
 
     def act(self,
@@ -138,29 +122,26 @@ def act(self,
         mean_actions, log_std, outputs = self.compute(inputs, role)
 
         # clamp log standard deviations
-        if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]:
-            log_std = torch.clamp(log_std,
-                                  self._g_log_std_min[role] if role in self._g_log_std_min else self._g_log_std_min[""],
-                                  self._g_log_std_max[role] if role in self._g_log_std_max else self._g_log_std_max[""])
+        if self._clip_log_std:
+            log_std = torch.clamp(log_std, self._log_std_min, self._log_std_max)
 
-        self._g_log_std[role] = log_std
-        self._g_num_samples[role] = mean_actions.shape[0]
+        self._log_std = log_std
+        self._num_samples = mean_actions.shape[0]
 
         # distribution
-        self._g_distribution[role] = Normal(mean_actions, log_std.exp())
+        self._distribution = Normal(mean_actions, log_std.exp())
 
         # sample using the reparameterization trick
-        actions = self._g_distribution[role].rsample()
+        actions = self._distribution.rsample()
 
         # clip actions
-        if self._g_clip_actions[role] if role in self._g_clip_actions else self._g_clip_actions[""]:
-            actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
+        if self._clip_actions:
+            actions = torch.clamp(actions, min=self._clip_actions_min, max=self._clip_actions_max)
 
         # log of the probability density function
-        log_prob = self._g_distribution[role].log_prob(inputs.get("taken_actions", actions))
-        reduction = self._g_reduction[role] if role in self._g_reduction else self._g_reduction[""]
-        if reduction is not None:
-            log_prob = reduction(log_prob, dim=-1)
+        log_prob = self._distribution.log_prob(inputs.get("taken_actions", actions))
+        if self._reduction is not None:
+            log_prob = self._reduction(log_prob, dim=-1)
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
@@ -181,10 +162,9 @@ def get_entropy(self, role: str = "") -> torch.Tensor:
             >>> print(entropy.shape)
             torch.Size([4096, 8])
         """
-        distribution = self._g_distribution[role] if role in self._g_distribution else self._g_distribution[""]
-        if distribution is None:
+        if self._distribution is None:
             return torch.tensor(0.0, device=self.device)
-        return distribution.entropy().to(self.device)
+        return self._distribution.entropy().to(self.device)
 
     def get_log_std(self, role: str = "") -> torch.Tensor:
         """Return the log standard deviation of the model
@@ -200,8 +180,7 @@ def get_log_std(self, role: str = "") -> torch.Tensor:
             >>> print(log_std.shape)
             torch.Size([4096, 8])
         """
-        return (self._g_log_std[role] if role in self._g_log_std else self._g_log_std[""]) \
-            .repeat(self._g_num_samples[role] if role in self._g_num_samples else self._g_num_samples[""], 1)
+        return self._log_std.repeat(self._num_samples, 1)
 
     def distribution(self, role: str = "") -> torch.distributions.Normal:
         """Get the current distribution of the model
@@ -217,4 +196,4 @@ def distribution(self, role: str = "") -> torch.distributions.Normal:
             >>> print(distribution)
             Normal(loc: torch.Size([4096, 8]), scale: torch.Size([4096, 8]))
         """
-        return self._g_distribution[role] if role in self._g_distribution else self._g_distribution[""]
+        return self._distribution
diff --git a/skrl/models/torch/multicategorical.py b/skrl/models/torch/multicategorical.py
new file mode 100644
index 00000000..2c749862
--- /dev/null
+++ b/skrl/models/torch/multicategorical.py
@@ -0,0 +1,155 @@
+from typing import Any, Mapping, Sequence, Tuple, Union
+
+import torch
+from torch.distributions import Categorical
+
+
+class MultiCategoricalMixin:
+    def __init__(self, unnormalized_log_prob: bool = True, reduction: str = "sum", role: str = "") -> None:
+        """MultiCategorical mixin model (stochastic model)
+
+        :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: ``True``).
+                                      If True, the model's output is interpreted as unnormalized log probabilities
+                                      (it can be any real number), otherwise as normalized probabilities
+                                      (the output must be non-negative, finite and have a non-zero sum)
+        :type unnormalized_log_prob: bool, optional
+        :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``).
+                          Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density
+                          function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)``
+        :type reduction: str, optional
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :raises ValueError: If the reduction method is not valid
+
+        Example::
+
+            # define the model
+            >>> import torch
+            >>> import torch.nn as nn
+            >>> from skrl.models.torch import Model, MultiCategoricalMixin
+            >>>
+            >>> class Policy(MultiCategoricalMixin, Model):
+            ...     def __init__(self, observation_space, action_space, device="cuda:0", unnormalized_log_prob=True, reduction="sum"):
+            ...         Model.__init__(self, observation_space, action_space, device)
+            ...         MultiCategoricalMixin.__init__(self, unnormalized_log_prob, reduction)
+            ...
+            ...         self.net = nn.Sequential(nn.Linear(self.num_observations, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, 32),
+            ...                                  nn.ELU(),
+            ...                                  nn.Linear(32, self.num_actions))
+            ...
+            ...     def compute(self, inputs, role):
+            ...         return self.net(inputs["states"]), {}
+            ...
+            >>> # given an observation_space: gym.spaces.Box with shape (4,)
+            >>> # and an action_space: gym.spaces.MultiDiscrete with nvec = [3, 2]
+            >>> model = Policy(observation_space, action_space)
+            >>>
+            >>> print(model)
+            Policy(
+              (net): Sequential(
+                (0): Linear(in_features=4, out_features=32, bias=True)
+                (1): ELU(alpha=1.0)
+                (2): Linear(in_features=32, out_features=32, bias=True)
+                (3): ELU(alpha=1.0)
+                (4): Linear(in_features=32, out_features=5, bias=True)
+              )
+            )
+        """
+        self._unnormalized_log_prob = unnormalized_log_prob
+        self._distributions = []
+
+        if reduction not in ["mean", "sum", "prod", "none"]:
+            raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'")
+        self._reduction = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \
+            else torch.prod if reduction == "prod" else None
+
+    def act(self,
+            inputs: Mapping[str, Union[torch.Tensor, Any]],
+            role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]:
+        """Act stochastically in response to the state of the environment
+
+        :param inputs: Model inputs. The most common keys are:
+
+                       - ``"states"``: state of the environment used to make the decision
+                       - ``"taken_actions"``: actions taken by the policy for the given states
+        :type inputs: dict where the values are typically torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        :return: Model output. The first component is the action to be taken by the agent.
+                 The second component is the log of the probability density function.
+                 The third component is a dictionary containing the network output ``"net_output"``
+                 and extra output values
+        :rtype: tuple of torch.Tensor, torch.Tensor or None, and dict
+
+        Example::
+
+            >>> # given a batch of sample states with shape (4096, 4)
+            >>> actions, log_prob, outputs = model.act({"states": states})
+            >>> print(actions.shape, log_prob.shape, outputs["net_output"].shape)
+            torch.Size([4096, 2]) torch.Size([4096, 1]) torch.Size([4096, 5])
+        """
+        # map from states/observations to normalized probabilities or unnormalized log probabilities
+        net_output, outputs = self.compute(inputs, role)
+
+        # unnormalized log probabilities
+        if self._unnormalized_log_prob:
+            self._distributions = [Categorical(logits=logits) for logits in torch.split(net_output, self.action_space.nvec.tolist(), dim=-1)]
+        # normalized probabilities
+        else:
+            self._distributions = [Categorical(probs=probs) for probs in torch.split(net_output, self.action_space.nvec.tolist(), dim=-1)]
+
+        # actions
+        actions = torch.stack([distribution.sample() for distribution in self._distributions], dim=-1)
+
+        # log of the probability density function
+        log_prob = torch.stack([distribution.log_prob(_actions.view(-1)) for _actions, distribution \
+                                in zip(torch.unbind(inputs.get("taken_actions", actions), dim=-1), self._distributions)], dim=-1)
+        if self._reduction is not None:
+            log_prob = self._reduction(log_prob, dim=-1)
+        if log_prob.dim() != actions.dim():
+            log_prob = log_prob.unsqueeze(-1)
+
+        outputs["net_output"] = net_output
+        return actions, log_prob, outputs
+
+    def get_entropy(self, role: str = "") -> torch.Tensor:
+        """Compute and return the entropy of the model
+
+        :return: Entropy of the model
+        :rtype: torch.Tensor
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        Example::
+
+            >>> entropy = model.get_entropy()
+            >>> print(entropy.shape)
+            torch.Size([4096, 1])
+        """
+        if self._distributions:
+            entropy = torch.stack([distribution.entropy().to(self.device) for distribution in self._distributions], dim=-1)
+            if self._reduction is not None:
+                return self._reduction(entropy, dim=-1).unsqueeze(-1)
+            return entropy
+        return torch.tensor(0.0, device=self.device)
+
+    def distribution(self, role: str = "") -> torch.distributions.Categorical:
+        """Get the current distribution of the model
+
+        :return: First distributions of the model
+        :rtype: torch.distributions.Categorical
+        :param role: Role play by the model (default: ``""``)
+        :type role: str, optional
+
+        Example::
+
+            >>> distribution = model.distribution()
+            >>> print(distribution)
+            Categorical(probs: torch.Size([10, 3]), logits: torch.Size([10, 3]))
+        """
+        # TODO: find a way to integrate in the class the distribution functions (e.g.: stddev)
+        return self._distributions[0]
diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py
index bf7a9ccf..0f43aadc 100644
--- a/skrl/models/torch/multivariate_gaussian.py
+++ b/skrl/models/torch/multivariate_gaussian.py
@@ -65,34 +65,20 @@ def __init__(self,
               )
             )
         """
-        if not hasattr(self, "_mg_clip_actions"):
-            self._mg_clip_actions = {}
-        self._mg_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
+        self._clip_actions = clip_actions and (issubclass(type(self.action_space), gym.Space) or \
             issubclass(type(self.action_space), gymnasium.Space))
 
-        if self._mg_clip_actions[role]:
-            self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
-            self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
-
-        if not hasattr(self, "_mg_clip_log_std"):
-            self._mg_clip_log_std = {}
-        self._mg_clip_log_std[role] = clip_log_std
-        if not hasattr(self, "_mg_log_std_min"):
-            self._mg_log_std_min = {}
-        self._mg_log_std_min[role] = min_log_std
-        if not hasattr(self, "_mg_log_std_max"):
-            self._mg_log_std_max = {}
-        self._mg_log_std_max[role] = max_log_std
-
-        if not hasattr(self, "_mg_log_std"):
-            self._mg_log_std = {}
-        self._mg_log_std[role] = None
-        if not hasattr(self, "_mg_num_samples"):
-            self._mg_num_samples = {}
-        self._mg_num_samples[role] = None
-        if not hasattr(self, "_mg_distribution"):
-            self._mg_distribution = {}
-        self._mg_distribution[role] = None
+        if self._clip_actions:
+            self._clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32)
+            self._clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32)
+
+        self._clip_log_std = clip_log_std
+        self._log_std_min = min_log_std
+        self._log_std_max = max_log_std
+
+        self._log_std = None
+        self._num_samples = None
+        self._distribution = None
 
     def act(self,
             inputs: Mapping[str, Union[torch.Tensor, Any]],
@@ -124,27 +110,25 @@ def act(self,
         mean_actions, log_std, outputs = self.compute(inputs, role)
 
         # clamp log standard deviations
-        if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]:
-            log_std = torch.clamp(log_std,
-                                  self._mg_log_std_min[role] if role in self._mg_log_std_min else self._mg_log_std_min[""],
-                                  self._mg_log_std_max[role] if role in self._mg_log_std_max else self._mg_log_std_max[""])
+        if self._clip_log_std:
+            log_std = torch.clamp(log_std, self._log_std_min, self._log_std_max)
 
-        self._mg_log_std[role] = log_std
-        self._mg_num_samples[role] = mean_actions.shape[0]
+        self._log_std = log_std
+        self._num_samples = mean_actions.shape[0]
 
         # distribution
         covariance = torch.diag(log_std.exp() * log_std.exp())
-        self._mg_distribution[role] = MultivariateNormal(mean_actions, scale_tril=covariance)
+        self._distribution = MultivariateNormal(mean_actions, scale_tril=covariance)
 
         # sample using the reparameterization trick
-        actions = self._mg_distribution[role].rsample()
+        actions = self._distribution.rsample()
 
         # clip actions
-        if self._mg_clip_actions[role] if role in self._mg_clip_actions else self._mg_clip_actions[""]:
-            actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max)
+        if self._clip_actions:
+            actions = torch.clamp(actions, min=self._clip_actions_min, max=self._clip_actions_max)
 
         # log of the probability density function
-        log_prob = self._mg_distribution[role].log_prob(inputs.get("taken_actions", actions))
+        log_prob = self._distribution.log_prob(inputs.get("taken_actions", actions))
         if log_prob.dim() != actions.dim():
             log_prob = log_prob.unsqueeze(-1)
 
@@ -165,10 +149,9 @@ def get_entropy(self, role: str = "") -> torch.Tensor:
             >>> print(entropy.shape)
             torch.Size([4096])
         """
-        distribution = self._mg_distribution[role] if role in self._mg_distribution else self._mg_distribution[""]
-        if distribution is None:
+        if self._distribution is None:
             return torch.tensor(0.0, device=self.device)
-        return distribution.entropy().to(self.device)
+        return self._distribution.entropy().to(self.device)
 
     def get_log_std(self, role: str = "") -> torch.Tensor:
         """Return the log standard deviation of the model
@@ -184,8 +167,7 @@ def get_log_std(self, role: str = "") -> torch.Tensor:
             >>> print(log_std.shape)
             torch.Size([4096, 8])
         """
-        return (self._mg_log_std[role] if role in self._mg_log_std else self._mg_log_std[""]) \
-            .repeat(self._mg_num_samples[role] if role in self._mg_num_samples else self._mg_num_samples[""], 1)
+        return self._log_std.repeat(self._num_samples, 1)
 
     def distribution(self, role: str = "") -> torch.distributions.MultivariateNormal:
         """Get the current distribution of the model
@@ -201,4 +183,4 @@ def distribution(self, role: str = "") -> torch.distributions.MultivariateNormal
             >>> print(distribution)
             MultivariateNormal(loc: torch.Size([4096, 8]), scale_tril: torch.Size([4096, 8, 8]))
         """
-        return self._mg_distribution[role] if role in self._mg_distribution else self._mg_distribution[""]
+        return self._distribution
diff --git a/skrl/multi_agents/jax/ippo/ippo.py b/skrl/multi_agents/jax/ippo/ippo.py
index 31aed63c..6dcfd328 100644
--- a/skrl/multi_agents/jax/ippo/ippo.py
+++ b/skrl/multi_agents/jax/ippo/ippo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Sequence, Union
+from typing import Any, Mapping, Optional, Sequence, Union
 
 import copy
 import functools
@@ -189,7 +189,7 @@ def _value_loss(params):
 class IPPO(MultiAgent):
     def __init__(self,
                  possible_agents: Sequence[str],
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
                  observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
                  action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
@@ -308,7 +308,7 @@ def __init__(self,
             else:
                 self._value_preprocessor[uid] = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/multi_agents/jax/mappo/mappo.py b/skrl/multi_agents/jax/mappo/mappo.py
index dd15c560..aa3aff34 100644
--- a/skrl/multi_agents/jax/mappo/mappo.py
+++ b/skrl/multi_agents/jax/mappo/mappo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Sequence, Union
+from typing import Any, Mapping, Optional, Sequence, Union
 
 import copy
 import functools
@@ -191,7 +191,7 @@ def _value_loss(params):
 class MAPPO(MultiAgent):
     def __init__(self,
                  possible_agents: Sequence[str],
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
                  observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
                  action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
@@ -323,7 +323,7 @@ def __init__(self,
             else:
                 self._value_preprocessor[uid] = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/multi_agents/torch/ippo/ippo.py b/skrl/multi_agents/torch/ippo/ippo.py
index e4a386f0..45913edd 100644
--- a/skrl/multi_agents/torch/ippo/ippo.py
+++ b/skrl/multi_agents/torch/ippo/ippo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Sequence, Union
+from typing import Any, Mapping, Optional, Sequence, Union
 
 import copy
 import itertools
@@ -67,7 +67,7 @@
 class IPPO(MultiAgent):
     def __init__(self,
                  possible_agents: Sequence[str],
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
                  observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
                  action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
@@ -178,7 +178,7 @@ def __init__(self,
             else:
                 self._value_preprocessor[uid] = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/multi_agents/torch/mappo/mappo.py b/skrl/multi_agents/torch/mappo/mappo.py
index 0d98275b..98fff05c 100644
--- a/skrl/multi_agents/torch/mappo/mappo.py
+++ b/skrl/multi_agents/torch/mappo/mappo.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Mapping, Optional, Sequence, Union
+from typing import Any, Mapping, Optional, Sequence, Union
 
 import copy
 import itertools
@@ -69,7 +69,7 @@
 class MAPPO(MultiAgent):
     def __init__(self,
                  possible_agents: Sequence[str],
-                 models: Dict[str, Model],
+                 models: Mapping[str, Model],
                  memories: Optional[Mapping[str, Memory]] = None,
                  observation_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
                  action_spaces: Optional[Union[Mapping[str, int], Mapping[str, gym.Space], Mapping[str, gymnasium.Space]]] = None,
@@ -193,7 +193,7 @@ def __init__(self,
             else:
                 self._value_preprocessor[uid] = self._empty_preprocessor
 
-    def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None:
+    def init(self, trainer_cfg: Optional[Mapping[str, Any]] = None) -> None:
         """Initialize the agent
         """
         super().init(trainer_cfg=trainer_cfg)
diff --git a/skrl/trainers/jax/__init__.py b/skrl/trainers/jax/__init__.py
index 0db99a1b..4348d781 100644
--- a/skrl/trainers/jax/__init__.py
+++ b/skrl/trainers/jax/__init__.py
@@ -1,4 +1,4 @@
 from skrl.trainers.jax.base import Trainer, generate_equally_spaced_scopes  # isort:skip
 
-from skrl.trainers.jax.manual import ManualTrainer
 from skrl.trainers.jax.sequential import SequentialTrainer
+from skrl.trainers.jax.step import StepTrainer
diff --git a/skrl/trainers/jax/base.py b/skrl/trainers/jax/base.py
index b30d0a86..b542c2e7 100644
--- a/skrl/trainers/jax/base.py
+++ b/skrl/trainers/jax/base.py
@@ -2,6 +2,7 @@
 
 import atexit
 import contextlib
+import sys
 import tqdm
 
 from skrl import logger
@@ -161,7 +162,7 @@ def single_agent_train(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -218,7 +219,7 @@ def single_agent_eval(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with contextlib.nullcontext():
@@ -274,7 +275,7 @@ def multi_agent_train(self) -> None:
         states, infos = self.env.reset()
         shared_states = infos.get("shared_states", None)
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -334,7 +335,7 @@ def multi_agent_eval(self) -> None:
         states, infos = self.env.reset()
         shared_states = infos.get("shared_states", None)
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with contextlib.nullcontext():
diff --git a/skrl/trainers/jax/sequential.py b/skrl/trainers/jax/sequential.py
index 0bd48278..6fbb261c 100644
--- a/skrl/trainers/jax/sequential.py
+++ b/skrl/trainers/jax/sequential.py
@@ -2,6 +2,7 @@
 
 import contextlib
 import copy
+import sys
 import tqdm
 
 import jax.numpy as jnp
@@ -11,12 +12,14 @@
 from skrl.trainers.jax import Trainer
 
 
+# [start-config-dict-jax]
 SEQUENTIAL_TRAINER_DEFAULT_CONFIG = {
     "timesteps": 100000,            # number of timesteps to train for
     "headless": False,              # whether to use headless mode (no rendering)
     "disable_progressbar": False,   # whether to disable the progressbar. If None, disable on non-TTY
     "close_environment_at_exit": True,   # whether to close the environment on normal program termination
 }
+# [end-config-dict-jax]
 
 
 class SequentialTrainer(Trainer):
@@ -84,7 +87,7 @@ def train(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             for agent in self.agents:
@@ -156,7 +159,7 @@ def eval(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with contextlib.nullcontext():
diff --git a/skrl/trainers/jax/manual.py b/skrl/trainers/jax/step.py
similarity index 89%
rename from skrl/trainers/jax/manual.py
rename to skrl/trainers/jax/step.py
index b8bf0c40..ae7e5986 100644
--- a/skrl/trainers/jax/manual.py
+++ b/skrl/trainers/jax/step.py
@@ -1,33 +1,38 @@
-from typing import List, Optional, Union
+from typing import Any, List, Optional, Tuple, Union
 
 import contextlib
 import copy
+import sys
 import tqdm
 
+import jax
 import jax.numpy as jnp
+import numpy as np
 
 from skrl.agents.jax import Agent
 from skrl.envs.wrappers.jax import Wrapper
 from skrl.trainers.jax import Trainer
 
 
-MANUAL_TRAINER_DEFAULT_CONFIG = {
+# [start-config-dict-jax]
+STEP_TRAINER_DEFAULT_CONFIG = {
     "timesteps": 100000,            # number of timesteps to train for
     "headless": False,              # whether to use headless mode (no rendering)
     "disable_progressbar": False,   # whether to disable the progressbar. If None, disable on non-TTY
     "close_environment_at_exit": True,   # whether to close the environment on normal program termination
 }
+# [end-config-dict-jax]
 
 
-class ManualTrainer(Trainer):
+class StepTrainer(Trainer):
     def __init__(self,
                  env: Wrapper,
                  agents: Union[Agent, List[Agent]],
                  agents_scope: Optional[List[int]] = None,
                  cfg: Optional[dict] = None) -> None:
-        """Manual trainer
+        """Step-by-step trainer
 
-        Train agents by manually controlling the training/evaluation loop
+        Train agents by controlling the training/evaluation loop step by step
 
         :param env: Environment to train on
         :type env: skrl.envs.wrappers.jax.Wrapper
@@ -36,10 +41,10 @@ def __init__(self,
         :param agents_scope: Number of environments for each agent to train on (default: ``None``)
         :type agents_scope: tuple or list of int, optional
         :param cfg: Configuration dictionary (default: ``None``).
-                    See MANUAL_TRAINER_DEFAULT_CONFIG for default values
+                    See STEP_TRAINER_DEFAULT_CONFIG for default values
         :type cfg: dict, optional
         """
-        _cfg = copy.deepcopy(MANUAL_TRAINER_DEFAULT_CONFIG)
+        _cfg = copy.deepcopy(STEP_TRAINER_DEFAULT_CONFIG)
         _cfg.update(cfg if cfg is not None else {})
         agents_scope = agents_scope if agents_scope is not None else []
         super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
@@ -56,7 +61,9 @@ def __init__(self,
 
         self.states = None
 
-    def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> None:
+    def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> \
+        Tuple[Union[np.ndarray, jax.Array], Union[np.ndarray, jax.Array],
+              Union[np.ndarray, jax.Array], Union[np.ndarray, jax.Array], Any]:
         """Execute a training iteration
 
         This method executes the following steps once:
@@ -75,6 +82,9 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         :param timesteps: Total number of timesteps (default: ``None``).
                           If None, the total number of timesteps is obtained from the trainer's config
         :type timesteps: int, optional
+
+        :return: Observation, reward, terminated, truncated, info
+        :rtype: tuple of np.ndarray or jax.Array and any other info
         """
         if timestep is None:
             self._timestep += 1
@@ -82,7 +92,7 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         timesteps = self.timesteps if timesteps is None else timesteps
 
         if self._progress is None:
-            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar)
+            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar, file=sys.stdout)
         self._progress.update(n=1)
 
         # set running mode
@@ -162,7 +172,11 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
             else:
                 self.states = next_states
 
-    def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> None:
+        return next_states, rewards, terminated, truncated, infos
+
+    def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> \
+        Tuple[Union[np.ndarray, jax.Array], Union[np.ndarray, jax.Array],
+              Union[np.ndarray, jax.Array], Union[np.ndarray, jax.Array], Any]:
         """Evaluate the agents sequentially
 
         This method executes the following steps in loop:
@@ -178,6 +192,9 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         :param timesteps: Total number of timesteps (default: ``None``).
                           If None, the total number of timesteps is obtained from the trainer's config
         :type timesteps: int, optional
+
+        :return: Observation, reward, terminated, truncated, info
+        :rtype: tuple of np.ndarray or jax.Array and any other info
         """
         if timestep is None:
             self._timestep += 1
@@ -185,7 +202,7 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         timesteps = self.timesteps if timesteps is None else timesteps
 
         if self._progress is None:
-            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar)
+            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar, file=sys.stdout)
         self._progress.update(n=1)
 
         # set running mode
@@ -249,3 +266,5 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
                 self.states, infos = self.env.reset()
             else:
                 self.states = next_states
+
+        return next_states, rewards, terminated, truncated, infos
diff --git a/skrl/trainers/torch/__init__.py b/skrl/trainers/torch/__init__.py
index 9fcf4349..2d1f8e40 100644
--- a/skrl/trainers/torch/__init__.py
+++ b/skrl/trainers/torch/__init__.py
@@ -1,5 +1,5 @@
 from skrl.trainers.torch.base import Trainer, generate_equally_spaced_scopes  # isort:skip
 
-from skrl.trainers.torch.manual import ManualTrainer
 from skrl.trainers.torch.parallel import ParallelTrainer
 from skrl.trainers.torch.sequential import SequentialTrainer
+from skrl.trainers.torch.step import StepTrainer
diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py
index 5c8b7a52..a13d70a4 100644
--- a/skrl/trainers/torch/base.py
+++ b/skrl/trainers/torch/base.py
@@ -1,6 +1,7 @@
 from typing import List, Optional, Union
 
 import atexit
+import sys
 import tqdm
 
 import torch
@@ -162,7 +163,7 @@ def single_agent_train(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -218,7 +219,7 @@ def single_agent_eval(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with torch.no_grad():
@@ -273,7 +274,7 @@ def multi_agent_train(self) -> None:
         states, infos = self.env.reset()
         shared_states = infos.get("shared_states", None)
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
@@ -332,7 +333,7 @@ def multi_agent_eval(self) -> None:
         states, infos = self.env.reset()
         shared_states = infos.get("shared_states", None)
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with torch.no_grad():
diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py
index 57212796..68b9b9d8 100644
--- a/skrl/trainers/torch/parallel.py
+++ b/skrl/trainers/torch/parallel.py
@@ -1,6 +1,7 @@
 from typing import List, Optional, Union
 
 import copy
+import sys
 import tqdm
 
 import torch
@@ -11,12 +12,14 @@
 from skrl.trainers.torch import Trainer
 
 
+# [start-config-dict-torch]
 PARALLEL_TRAINER_DEFAULT_CONFIG = {
     "timesteps": 100000,            # number of timesteps to train for
     "headless": False,              # whether to use headless mode (no rendering)
     "disable_progressbar": False,   # whether to disable the progressbar. If None, disable on non-TTY
     "close_environment_at_exit": True,   # whether to close the environment on normal program termination
 }
+# [end-config-dict-torch]
 
 
 def fn_processor(process_index, *args):
@@ -201,7 +204,7 @@ def train(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             for pipe in producer_pipes:
@@ -337,7 +340,7 @@ def eval(self) -> None:
         if not states.is_cuda:
             states.share_memory_()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with torch.no_grad():
diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py
index 8f1ef1c2..49952351 100644
--- a/skrl/trainers/torch/sequential.py
+++ b/skrl/trainers/torch/sequential.py
@@ -1,6 +1,7 @@
 from typing import List, Optional, Union
 
 import copy
+import sys
 import tqdm
 
 import torch
@@ -10,12 +11,14 @@
 from skrl.trainers.torch import Trainer
 
 
+# [start-config-dict-torch]
 SEQUENTIAL_TRAINER_DEFAULT_CONFIG = {
     "timesteps": 100000,            # number of timesteps to train for
     "headless": False,              # whether to use headless mode (no rendering)
     "disable_progressbar": False,   # whether to disable the progressbar. If None, disable on non-TTY
     "close_environment_at_exit": True,   # whether to close the environment on normal program termination
 }
+# [end-config-dict-torch]
 
 
 class SequentialTrainer(Trainer):
@@ -83,7 +86,7 @@ def train(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # pre-interaction
             for agent in self.agents:
@@ -154,7 +157,7 @@ def eval(self) -> None:
         # reset env
         states, infos = self.env.reset()
 
-        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar):
+        for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar, file=sys.stdout):
 
             # compute actions
             with torch.no_grad():
diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/step.py
similarity index 90%
rename from skrl/trainers/torch/manual.py
rename to skrl/trainers/torch/step.py
index c0179591..c60476f1 100644
--- a/skrl/trainers/torch/manual.py
+++ b/skrl/trainers/torch/step.py
@@ -1,6 +1,7 @@
-from typing import List, Optional, Union
+from typing import Any, List, Optional, Tuple, Union
 
 import copy
+import sys
 import tqdm
 
 import torch
@@ -10,23 +11,25 @@
 from skrl.trainers.torch import Trainer
 
 
-MANUAL_TRAINER_DEFAULT_CONFIG = {
+# [start-config-dict-torch]
+STEP_TRAINER_DEFAULT_CONFIG = {
     "timesteps": 100000,            # number of timesteps to train for
     "headless": False,              # whether to use headless mode (no rendering)
     "disable_progressbar": False,   # whether to disable the progressbar. If None, disable on non-TTY
     "close_environment_at_exit": True,   # whether to close the environment on normal program termination
 }
+# [end-config-dict-torch]
 
 
-class ManualTrainer(Trainer):
+class StepTrainer(Trainer):
     def __init__(self,
                  env: Wrapper,
                  agents: Union[Agent, List[Agent]],
                  agents_scope: Optional[List[int]] = None,
                  cfg: Optional[dict] = None) -> None:
-        """Manual trainer
+        """Step-by-step trainer
 
-        Train agents by manually controlling the training/evaluation loop
+        Train agents by controlling the training/evaluation loop step by step
 
         :param env: Environment to train on
         :type env: skrl.envs.wrappers.torch.Wrapper
@@ -35,10 +38,10 @@ def __init__(self,
         :param agents_scope: Number of environments for each agent to train on (default: ``None``)
         :type agents_scope: tuple or list of int, optional
         :param cfg: Configuration dictionary (default: ``None``).
-                    See MANUAL_TRAINER_DEFAULT_CONFIG for default values
+                    See STEP_TRAINER_DEFAULT_CONFIG for default values
         :type cfg: dict, optional
         """
-        _cfg = copy.deepcopy(MANUAL_TRAINER_DEFAULT_CONFIG)
+        _cfg = copy.deepcopy(STEP_TRAINER_DEFAULT_CONFIG)
         _cfg.update(cfg if cfg is not None else {})
         agents_scope = agents_scope if agents_scope is not None else []
         super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
@@ -55,7 +58,8 @@ def __init__(self,
 
         self.states = None
 
-    def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> None:
+    def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> \
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]:
         """Execute a training iteration
 
         This method executes the following steps once:
@@ -74,6 +78,9 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         :param timesteps: Total number of timesteps (default: ``None``).
                           If None, the total number of timesteps is obtained from the trainer's config
         :type timesteps: int, optional
+
+        :return: Observation, reward, terminated, truncated, info
+        :rtype: tuple of torch.Tensor and any other info
         """
         if timestep is None:
             self._timestep += 1
@@ -81,7 +88,7 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         timesteps = self.timesteps if timesteps is None else timesteps
 
         if self._progress is None:
-            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar)
+            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar, file=sys.stdout)
         self._progress.update(n=1)
 
         # set running mode
@@ -162,7 +169,10 @@ def train(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
             else:
                 self.states = next_states
 
-    def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> None:
+        return next_states, rewards, terminated, truncated, infos
+
+    def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None) -> \
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]:
         """Evaluate the agents sequentially
 
         This method executes the following steps in loop:
@@ -178,6 +188,9 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         :param timesteps: Total number of timesteps (default: ``None``).
                           If None, the total number of timesteps is obtained from the trainer's config
         :type timesteps: int, optional
+
+        :return: Observation, reward, terminated, truncated, info
+        :rtype: tuple of torch.Tensor and any other info
         """
         if timestep is None:
             self._timestep += 1
@@ -185,7 +198,7 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
         timesteps = self.timesteps if timesteps is None else timesteps
 
         if self._progress is None:
-            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar)
+            self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar, file=sys.stdout)
         self._progress.update(n=1)
 
         # set running mode
@@ -248,3 +261,5 @@ def eval(self, timestep: Optional[int] = None, timesteps: Optional[int] = None)
                 self.states, infos = self.env.reset()
             else:
                 self.states = next_states
+
+        return next_states, rewards, terminated, truncated, infos