chore: Store progress

kirillbobyrev · Jun 28, 2024 · 89d3942 · 89d3942
1 parent 5ac437e
commit 89d3942
Show file tree

Hide file tree

Showing 10 changed files with 163 additions and 102 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -73,6 +73,7 @@ itertools = "0.13.0"
 # Use SmallRng for performance.
 rand = { version = "0.8.5", features = ["small_rng"] }
 rand_distr = "0.4.3"
+rayon = "1.10.0"
 shadow-rs = "0.29.0"
 
 [build-dependencies]

diff --git a/README.md b/README.md
@@ -13,8 +13,9 @@ Pabi is a modern chess engine that is currently under development.
 
 ## Goals
 
-Pabi is inspired by existing Chess and Go engines (mainly [lc0] and [KataGo]).
-It strives to be a high-quality modern engine.
+Pabi is inspired by existing Chess and Go engines (mainly [lc0] and [KataGo]),
+and the research that they are based on ([AlphaZero], [MuZero] and [MCTS]). Pabi
+strives to be a high-quality modern engine.
 
 **Modern**: Pabi should use up-to-date [Rust] toolchain, is targeting modern
 processor architectures, uses latest developments in the domains of programming

diff --git a/justfile b/justfile
@@ -6,7 +6,7 @@ build:
 
 # Runs the engine and enters UCI mode.
 run:
-  cargo run --profile=release
+  cargo run --profile=release --bin=pabi
 
 # Format all code.
 fmt:

diff --git a/src/chess/game.rs b/src/chess/game.rs
@@ -3,13 +3,20 @@ use crate::chess::position::Position;
 use crate::chess::zobrist::RepetitionTable;
 use crate::environment::{Action, Environment, GameResult, Observation};
 
+impl Action for Move {
+    fn get_index(&self) -> u16 {
+        todo!();
+    }
+}
+
 impl Observation for Position {}
 
 pub struct Game {
     position: Position,
     perspective: Color,
     repetitions: RepetitionTable,
     moves: MoveList,
+    outcome: Option<GameResult>,
 }
 
 impl Game {
@@ -25,6 +32,7 @@ impl Game {
             perspective,
             repetitions,
             moves,
+            outcome: None,
         }
     }
 }
@@ -34,8 +42,11 @@ impl Environment<Move, Position> for Game {
         &self.moves
     }
 
-    fn apply(&mut self, action: impl Action) -> Position {
-        todo!();
+    fn apply(&mut self, action: &Move) -> &Position {
+        self.position.make_move(action);
+        let _ = self.repetitions.record(self.position.hash());
+        self.moves = self.position.generate_moves();
+        &self.position
     }
 
     fn result(&self) -> Option<GameResult> {
@@ -47,12 +58,6 @@ impl Environment<Move, Position> for Game {
     }
 }
 
-impl Action for Move {
-    fn get_index(&self) -> u16 {
-        todo!();
-    }
-}
-
 #[cfg(test)]
 mod tests {
 

diff --git a/src/environment.rs b/src/environment.rs
@@ -18,6 +18,6 @@ pub trait Action: Sized {
 /// Standard gym-like Reinforcement Learning environment interface.
 pub trait Environment<A: Action, O: Observation>: Sized {
     fn actions(&self) -> &[A];
-    fn apply(&mut self, action: impl Action) -> O;
+    fn apply(&mut self, action: &A) -> &O;
     fn result(&self) -> Option<GameResult>;
 }
diff --git a/src/mcts/mod.rs b/src/mcts/mod.rs
@@ -2,6 +2,8 @@
 //!
 //! [Monte Carlo Tree Search]: https://en.wikipedia.org/wiki/Monte_Carlo_tree_search
 
+mod policy;
+mod search;
 mod tree;
 
 /// Search depth in plies.

diff --git a/src/mcts/policy.rs b/src/mcts/policy.rs
diff --git a/src/mcts/search.rs b/src/mcts/search.rs
@@ -0,0 +1,23 @@
+use super::Depth;
+
+fn search(num_simulations: usize, max_depth: Depth) {
+    for _ in 0..num_simulations{
+        todo!()
+    }
+}
+
+fn select() {
+    todo!()
+}
+
+fn expand() {
+    todo!()
+}
+
+fn backup() {
+    todo!()
+}
+
+fn simulate() {
+    todo!()
+}
diff --git a/src/mcts/tree.rs b/src/mcts/tree.rs
@@ -1,5 +1,7 @@
+use crate::{environment::Action, evaluation::QValue};
+
 struct Tree {
-    root: Node,
+    nodes: Vec<Node>,
 }
 
 type NodeIndex = usize;
@@ -11,6 +13,22 @@ const TOMBSTONE_PARENT: NodeIndex = usize::MAX;
 struct Node {
     parent: NodeIndex,
     children: Vec<NodeIndex>,
-    wins: u32,
+    // Use Win-Draw-Loss evaluation, similar to lc0:
+    // https://lczero.org/blog/2020/04/wdl-head/
+    w_count: u32,
+    d_count: u32,
+    l_count: u32,
     visits: u32,
 }
+
+impl Node {
+    #[must_use]
+    const fn visited(&self) -> bool {
+        self.visits > 0
+    }
+
+    #[must_use]
+    const fn q_value(action: impl Action) -> QValue {
+        todo!()
+    }
+}