From 579ab9ece1bfde81abf272f3f700b49edebb05f4 Mon Sep 17 00:00:00 2001
From: eugenioenko <eugenioenko@gmail.com>
Date: Tue, 17 Feb 2026 11:55:16 -0800
Subject: [PATCH] feat: tt bound

---
 README.md     | 204 +++++++++++++++++++++++++++++---------------------
 pkg/search.go |  43 +++++++++--
 pkg/tt.go     |  22 +++---
 3 files changed, 168 insertions(+), 101 deletions(-)

diff --git a/README.md b/README.md
index 0a9d1ff..6db0da4 100644
--- a/README.md
+++ b/README.md
@@ -14,17 +14,23 @@ Libra Chess is a UCI (Universal Chess Interface) compliant chess engine written
 
 This engine is designed for chess enthusiasts, developers looking to understand chess engine internals, and as a demonstration of software engineering principles applied to a complex domain.
 
+The engine also compiles to WebAssembly (WASM), allowing it to run entirely in the browser. You can [play against Libra Chess live](https://eugenioenko.github.io/libra-chess-ui) in the web interface.
+
+**Estimated strength: ~1800 ELO** (tested against Stockfish at various ELO levels using cutechess-cli).
+
 ---
 
 ## 2. ✨ Key Features
 
-- **UCI Protocol Compliant:** Seamless integration with popular UCI-compatible GUIs (e.g., CuteChess, CoreChess, PyChess).
-- **Alpha-Beta Search:** Optimized Alpha-Beta pruning forms the core of the search algorithm.
-- **Iterative Deepening:** Allows for flexible time management and progressive deepening of the search.
-- **Transposition Tables:** Utilizes Zobrist hashing to store and retrieve previously evaluated positions, significantly speeding up search by avoiding redundant computations.
-- **Piece-Square Tables (PSTs):** Employs PSTs for nuanced positional evaluation, guiding the engine's understanding of piece placement.
-- **Material Evaluation:** Core evaluation component based on standard piece values.
-- **Endgame Evaluation Heuristics:** Includes logic to encourage king activity and mating sequences in the endgame (e.g., incentivizing the stronger side's king to approach the opponent's king).
+- **UCI Protocol Compliant:** Seamless integration with popular UCI-compatible GUIs (e.g., CuteChess, CoreChess, PyChess). Supports `wtime`, `btime`, `winc`, `binc`, `movestogo`, `movetime`, `depth`, `infinite`, and `stop`.
+- **Alpha-Beta Search with Quiescence:** Alpha-Beta pruning with quiescence search at leaf nodes to resolve tactical sequences and avoid the horizon effect.
+- **Iterative Deepening:** Progressive deepening with soft/hard time limits for flexible time management.
+- **Transposition Table:** Zobrist hashing with bound types (exact, lower, upper) for effective position caching and search cutoffs.
+- **Tapered Evaluation:** PeSTO piece-square tables with middlegame/endgame interpolation based on game phase, providing phase-aware positional understanding.
+- **Move Ordering:** TT move, MVV-LVA captures, killer moves, and history heuristic for efficient alpha-beta pruning.
+- **Parallel Root Search:** Distributes root moves across worker goroutines using all available CPU cores.
+- **Endgame Heuristics:** King proximity bonus in endgames to encourage mating with material advantage.
+- **WASM Build:** Compiles to WebAssembly, enabling the engine to run entirely in the browser. Powers the [live web interface](https://eugenioenko.github.io/libra-chess-ui).
 - **Move Generation:** Optimized and validated pseudo-legal move generation with legality checks.
 - **Comprehensive Testing Suite:**
   - Unit tests for core logic (`go test`).
@@ -45,13 +51,11 @@ This engine is designed for chess enthusiasts, developers looking to understand
 ### 3.2. Building the Engine
 
 ```bash
-go build
-# or
-make build
+make build        # Native binary: ./libra-chess
+make build-wasm   # WASM binary: wasm/libra.wasm
+make build-release # Cross-compile for all platforms
 ```
 
-This will produce the main executable `libra-chess`
-
 ### 3.3. Running the Engine
 
 - **Directly (for UCI interaction):**
@@ -90,7 +94,12 @@ This will produce the main executable `libra-chess`
   ```bash
   make test-stockfish
   ```
-  _(Requires Stockfish CLI to be at `./stockfish/stockfish-cli`)_
+  _(Requires Stockfish CLI to be at `./dist/stockfish/stockfish-cli`)_
+- **Fast ELO Estimate:**
+  ```bash
+  make test-elo
+  ```
+  _(40 games vs Stockfish 1500, 10+1)_
 - **Debug Match:**
   ```bash
   make test-debug
@@ -105,7 +114,7 @@ Libra Chess is architected with modularity, simplicity, and maintainability as p
 ### 4.1. Language Choice: Go
 
 - **Advantages:**
-  - **Concurrency:** Go's goroutines and channels offer a powerful yet simple model for concurrent programming, which is pivotal for future enhancements like parallel search.
+  - **Concurrency:** Go's goroutines and channels offer a powerful yet simple model for concurrent programming, used for parallel root search and search cancellation.
   - **Performance:** While not C/C++, Go offers impressive performance, especially with its efficient garbage collector (GC) and direct compilation to machine code. Careful memory management is still crucial.
   - **Simplicity & Readability:** Go's clean syntax and established conventions promote maintainable and understandable code.
   - **Tooling:** Rich ecosystem including `gofmt` for automated formatting, `go test` for testing, and `golangci-lint` for static analysis.
@@ -118,45 +127,45 @@ Libra Chess is architected with modularity, simplicity, and maintainability as p
 The engine's core logic is organized within the `pkg/` directory, with separation of concerns:
 
 - `board.go`: Board representation, piece management, and core game state.
-- `evaluate.go`: Static evaluation function, including material, PSTs, and endgame heuristics.
-- `generate.go`: Move generation logic.
-- `search.go`: Search algorithms (Alpha-Beta, iterative deepening).
-- `tt.go`: Transposition table implementation.
+- `evaluate.go`: Static evaluation function, including tapered PeSTO evaluation and endgame heuristics.
+- `generate.go`: Move generation logic (legal moves and capture-only generation for quiescence).
+- `search.go`: Search algorithms (Alpha-Beta, quiescence search, iterative deepening, parallel root search).
+- `sort.go`: Move ordering (TT move, MVV-LVA, killer moves, history heuristic).
+- `tt.go`: Transposition table implementation with bound types.
 - `zobrist.go`: Zobrist hashing for position keys.
-- `move.go`: Move/UndoMove for calculations,
-- `piece.go`: Pieces definitions,
+- `move.go`: Move/UndoMove for calculations.
+- `piece.go`: Pieces definitions.
+- `const.go`: Constants, piece-square tables, and phase values.
+- `uci.go`: UCI time management and command parsing.
 - `utils.go`: Utility and data structure definitions.
 
 This modular design facilitates easier testing, debugging, and future feature development.
 
 ### 4.3. Evaluation Function Design (`evaluate.go`)
 
-The current evaluation function is a classical handcrafted one, balancing speed and accuracy.
+The evaluation uses a tapered PeSTO approach — chosen over simpler material-only evaluation because PeSTO tables are well-studied, require no tuning infrastructure, and provide both material and positional scoring in a single lookup. Alternatives like Texel-tuned custom tables were deferred since they require a large labeled game dataset and tuning framework that aren't justified at this stage.
 
 - **Components:**
-  - **Material:** Standard piece values (Pawn:100, Knight:300, Bishop:300, Rook:500, Queen:900).
-  - **Piece-Square Tables (PSTs):** Static tables that assign positional bonuses or penalties to pieces based on their square. These are currently simplified and offer a good baseline.
-  - **Endgame Heuristics:** A specific heuristic encourages the king with a material advantage to move towards the opponent's king when total material on the board is low (<= 14 units, excluding kings). This promotes checkmates in won endgames.
+  - **Tapered PeSTO Evaluation:** Separate middlegame and endgame piece-square tables for all piece types, with material values baked in at startup. The game phase is computed from remaining pieces (knights, bishops, rooks, queens) and used to interpolate between MG and EG scores.
+  - **Endgame Heuristics:** King proximity bonus encourages the stronger side's king to approach the opponent's king when material is low, promoting checkmates in won endgames.
 - **Trade-offs:**
-  - **Speed vs. Accuracy:** Handcrafted evaluation functions are generally fast. The current PSTs are relatively simple, which makes evaluation quick but potentially less nuanced than more complex schemes or ML-based models.
-  - **Complexity of Terms:** Adding more evaluation terms (e.g., detailed pawn structure, king safety, mobility beyond basic checks, passed pawns) can improve strength but increases computational cost and tuning complexity. This is a key area for future refinement.
+  - PeSTO tables provide a strong positional baseline with zero tuning cost, but lack awareness of pawn structure, king safety, and piece mobility — terms that require per-position computation and would slow evaluation.
+  - Adding evaluation complexity has diminishing returns without search improvements to reach the positions where it matters. Search depth was prioritized first.
 
 ### 4.4. Search Algorithm (`search.go`)
 
-- **Minimax with Alpha-Beta Pruning:** A standard and effective framework for chess search.
-- **Iterative Deepening:** Allows the engine to search to a certain depth, then use the information from that search (e.g., principal variation) to order moves for the next, deeper iteration. This is essential for effective time management.
-- **Quiescence Search:** (Assumed, or a high-priority addition) To mitigate the horizon effect, a quiescence search is typically implemented to evaluate "quiet" positions by extending the search for captures and other tactical moves.
+- **Alpha-Beta with Iterative Deepening:** Searches to progressively deeper depths, using soft time limits (stop deepening) and hard time limits (abort in-flight search). Move ordering from previous iterations improves pruning at each new depth.
+- **Quiescence Search:** At leaf nodes, extends the search for all captures and promotions until the position is quiet, using stand-pat evaluation and MVV-LVA ordering. Without quiescence, the engine would evaluate positions mid-exchange and make severe tactical blunders.
+- **Move Ordering:** TT move first, then MVV-LVA captures, killer moves, and history heuristic. Good move ordering is the single biggest factor in alpha-beta efficiency — the difference between searching 10x more or fewer nodes in the same time.
 - **Trade-offs:**
-  - **Search Depth vs. Time:** The deeper the search, generally the stronger the play, but time is a finite resource. Effective move ordering, pruning, and extensions/reductions are key to searching deeper within the allocated time.
-  - **Selectivity:** Deciding which branches of the search tree to explore deeply (extensions) and which to prune or search shallowly (reductions) is a complex balancing act.
-
-### 4.5. Testing and Benchmarking Philosophy
+  - Parallel root search clones the board for each worker, trading memory for thread safety. This avoids lock contention entirely but means interior nodes can't share pruning information across threads — a known limitation that Lazy SMP would address.
+  - The TT uses a Go `map` with `sync.RWMutex`, which is simple and correct but has GC pressure and cache-unfriendly access patterns compared to a fixed-size array. This is a deliberate simplicity-first choice; profiling shows it's not yet the bottleneck.
 
-A robust testing strategy is paramount for engine development.
+### 4.5. Testing and Validation
 
-- **Correctness:** `perft` tests validate move generation exhaustively. Unit tests cover individual functions and modules.
-- **Strength:** Regular match play against baseline versions of Libra, other engines like Stockfish (at controlled ELO levels), and itself (`test-cutechess`) provides a measure of playing strength and helps identify regressions or improvements from changes. The `Makefile` provides targets for these tests.
-- **Debugging:** The `test-debug` target in the `Makefile` facilitates focused debugging sessions with `cutechess-cli`.
+- **Correctness:** `perft` tests validate move generation against known node counts at each depth. Unit tests cover evaluation, search, and move generation.
+- **Strength Regression:** Every change is validated through head-to-head matches against the previous version (`make test-cutechess`) and against Stockfish at controlled ELO levels (`make test-elo`). A change that doesn't win more than it loses doesn't ship.
+- **Methodology:** See [Design Decisions & Measured Impact](#55-design-decisions--measured-impact) for detailed results.
 
 ---
 
@@ -179,70 +188,93 @@ A robust testing strategy is paramount for engine development.
 
 ### 5.2. Move Generation (`generate.go`)
 
-- **Pseudo-Legal to Legal:** Moves are typically generated as pseudo-legal (ignoring checks to the king) and then validated.
-- **Efficiency:** Techniques like pre-calculated attack tables for sliding pieces and knight moves are common. Libra's current approach is direct computation, which can be optimized further.
+- **Pseudo-Legal to Legal:** Moves are generated as pseudo-legal (ignoring pins and checks) then filtered through `IsMoveLegal()` which applies the move and checks if the king is attacked. This is simpler than maintaining attack maps but means illegal moves are generated and discarded.
+- **Precomputed Tables:** `RookRays`, `BishopRays`, `KnightOffsets`, `KingOffsets`, and `SquaresToEdge` are computed once at startup, avoiding repeated calculation during search.
+- **Capture Generation:** `GenerateLegalCaptures()` generates only captures and promotions for quiescence search, avoiding the cost of generating quiet moves at leaf nodes.
 
 ### 5.3. Transposition Table (`tt.go` & `zobrist.go`)
 
-- **Zobrist Hashing:** Each position is mapped to a unique (with high probability) 64-bit hash key. Keys are updated incrementally when moves are made/unmade.
-- **Table Structure:** Typically a hash map or a large array with a simple indexing scheme (e.g., `hash % table_size`).
-- **Stored Information:** Each entry stores the hash key (for collision detection), depth of the search, score, score type (exact, lower bound, upper bound), and best move.
-  - _Trade-off:_ The amount of information stored per entry affects TT size and the utility of hits. More info is better but costs memory.
+- **Zobrist Hashing:** Each position is mapped to a 64-bit hash key. Currently computed from scratch each lookup rather than incrementally updated — simpler to implement correctly but slower. Incremental updates are a future optimization.
+- **Table Structure:** Go `map[uint64]TTEntry` with `sync.RWMutex`. A fixed-size array (`hash % size`) would be more cache-friendly and avoid GC pressure, but the map approach was chosen for correctness-first development.
+- **Bound Types:** Each entry stores the score's relationship to the search window — exact (PV node), lower bound (beta cutoff), or upper bound (failed low). This allows the TT to produce cutoffs even when the stored score isn't exact, which dramatically increases hit rates.
+- **Replacement Policy:** Depth-preferred — only overwrites if the new search depth >= stored depth. This preserves the most valuable (deepest) results but can cause the table to fill with stale deep entries over time. Age-based replacement would address this.
 
 ### 5.4. Concurrency Strategy
 
-- **Parallel Move Evaluation:** The core search algorithm leverages Go's concurrency by evaluating top-level moves in parallel. The `Search` function distributes each legal move at the root to a worker goroutine, allowing multiple positions to be searched simultaneously and efficiently utilizing all available CPU cores.
-- **Worker Pool:** The number of worker goroutines is determined by the number of logical CPUs (`runtime.GOMAXPROCS(0)`), ensuring optimal parallelism on the host system.
-- **Result Aggregation:** Results from all workers are collected and the best move is selected according to the search score, preserving move ordering for tie-breaking.
-- **Thread Safety:** Each worker operates on a cloned board state, ensuring thread safety and correctness.
-- **UCI Communication:** UCI command handling can still be performed in a separate goroutine to keep the engine responsive during search.
+- **Parallel Root Search:** Root moves are distributed to a worker pool sized to `runtime.GOMAXPROCS(0)`. Each worker clones the board and searches independently — no shared mutable state, no lock contention during search.
+- **Trade-off:** Cloning per worker means interior nodes can't share alpha-beta bounds across threads. This is less efficient than Lazy SMP (where threads share the TT and occasionally duplicate work but benefit from different move orderings). However, the clone approach is simpler, correct by construction, and avoids subtle concurrency bugs.
+- **Cancellation:** Search goroutines listen on a `Done` channel for timeouts and UCI `stop` commands. The UCI loop runs in a separate goroutine so the engine remains responsive during search.
 
-This approach provides significant speedup for the root search and is a foundation for further parallelism in deeper search layers in the future.
+### 5.5. Design Decisions & Measured Impact
 
-### 5.5. Move Generation Benchmarks
+Each major feature was validated through head-to-head matches (10 games, 30+0 time control) against the previous version using `cutechess-cli` before merging.
 
-The following table shows the results of running the `PerftParallel(N)` benchmark at increasing depths. The "Time per op" column shows the average time (in microseconds) to compute all legal moves at each depth, and the "Legal moves calculated" column shows the total number of legal moves (nodes) at that depth.
+| Change | Result vs. Previous | Impact |
+| ------ | ------------------- | ------ |
+| **Tapered PeSTO Evaluation** (replacing simple material + basic PSTs) | 8-0-4 | Largest single ELO gain. PeSTO tables provide both material and positional scoring with phase-aware interpolation, replacing ~10 lines of material counting with a well-studied lookup scheme. |
+| **Quiescence Search** (capture search at leaf nodes) | 6-0-4 | Eliminated horizon effect — the engine no longer evaluates positions mid-exchange. Stand-pat evaluation with MVV-LVA ordered captures. |
+| **TT Bound Types** (exact/lower/upper instead of exact-only) | 8-1-1 | Most TT entries are bounds, not exact scores. Without bound types, the vast majority of TT hits were wasted. Search test suite dropped from 9.1s to 6.2s. |
+| **UCI Time Management** (soft/hard limits, increment, movestogo) | — | No strength change in self-play, but critical for tournament play — the engine previously used a hardcoded 1s/move regardless of time control. |
 
-| perft(N) | Time per op (microseconds) | Legal moves calculated |
-| -------- | -------------------------- | ---------------------- |
-| 1        | 2.81                       | 20                     |
-| 2        | 37.85                      | 400                    |
-| 3        | 274.88                     | 8,902                  |
-| 4        | 6512.77                    | 197,281                |
-| 5        | 142827.80                  | 4,865,609              |
-| 6        | 3865875.06                 | 119,060,324            |
+**Methodology:** Each change is tested in isolation against the immediately prior version. Self-play results are directional (small sample), so major changes are also validated against Stockfish at fixed ELO levels for absolute strength estimation.
 
-> cpu: AMD Ryzen 7 6800H
+### 5.6. Move Generation Benchmarks
 
-This benchmark demonstrates that the engine can process approximately **30 million moves per second**.
-This is calculated by dividing the number of legal moves at depth 5 by the time taken per operation (in seconds):
+| perft(N) | Time per op (μs) | Legal moves calculated |
+| -------- | ----------------- | ---------------------- |
+| 1        | 2.81              | 20                     |
+| 2        | 37.85             | 400                    |
+| 3        | 274.88            | 8,902                  |
+| 4        | 6512.77           | 197,281                |
+| 5        | 142827.80         | 4,865,609              |
+| 6        | 3865875.06        | 119,060,324            |
 
-```
-Moves per second ≈ 4,865,609 moves / 0.1428278 seconds ≈ 34,070,000 moves/second
-```
+> cpu: AMD Ryzen 7 6800H — approximately **30 million moves per second**.
+
+### 5.6. Playing Strength
+
+| Opponent          | Games | Score      | Win Rate | Est. ELO Diff |
+| ----------------- | ----- | ---------- | -------- | ------------- |
+| Stockfish 1500    | 40    | 31-9-0     | 77.5%    | +215          |
+| Stockfish 1800    | 10    | 7-2-1      | 75.0%    | +191          |
+| Stockfish 2000    | 10    | 2-8-0      | 20.0%    | -241          |
+
+Estimated playing strength: **~1800 ELO**.
 
 ---
 
-## 6. 🛣️ Roadmap & Future Enhancements
-
-Libra Chess is an actively evolving project. Key areas for future development include:
-
-- **Search Enhancements:**
-  - **Principal Variation Search (PVS):** Implement PVS for more efficient search.
-  - **Late Move Reductions (LMR):** Reduce search depth for moves ordered later.
-  - **Futility Pruning & Razoring:** More aggressive pruning techniques.
-  - **Null Move Pruning:** A powerful pruning technique.
-  - **Improved Quiescence Search:** More robust handling of tactical positions.
-- **Evaluation Refinements:**
-  - **Advanced Positional Terms:** Incorporate pawn structure analysis, king safety, mobility scores, passed pawn evaluation.
-  - **Tapered Evaluation:** Smoothly transition PSTs and other eval terms from middlegame to endgame.
-  - **Automated Tuning:** Explore techniques like CLOP (Chess ELO Optimizer) for tuning evaluation parameters.
-- **Time Management:** More sophisticated algorithms to allocate time effectively across moves.
-- **Opening Book:** Develop or integrate a more comprehensive internal opening book format.
-- **Endgame Tablebases:** Integrate support for Syzygy or Gaviota tablebases for perfect play in endgames.
-- **Concurrency in Search:** Leverage Go's goroutines to implement parallel search algorithms (e.g., Lazy SMP or ABDADA).
-- **UCI Options:** Expose more internal parameters (e.g., Hash size, contempt factor) via UCI options.
-- **Continuous Integration/Delivery:** Enhance CI pipeline for automated testing and releases.
+## 6. 🛣️ Roadmap
+
+Prioritized by expected ELO impact relative to implementation complexity. Items higher on the list have better strength-to-effort ratios based on results from other engines at similar rating ranges.
+
+### Phase 1: Search Depth (highest impact)
+
+These techniques let the engine search deeper in the same time by pruning more of the tree. At ~1800 ELO, search depth is the primary bottleneck.
+
+- **Null Move Pruning (~50-100 ELO):** In non-zugzwang positions, skip a move and search with reduced depth. If the opponent still can't beat beta, the position is so good that a full search is unnecessary. Cheap to implement, large pruning gains.
+- **Late Move Reductions (~50-80 ELO):** Moves ordered late by the move ordering heuristic are unlikely to be best. Search them at reduced depth first and only re-search at full depth if they surprise. Synergizes with good move ordering — which is already in place.
+- **Principal Variation Search (~20-40 ELO):** Search the first move (expected best from TT/move ordering) with a full window and all remaining moves with a zero window. Re-search on fail-high. Effective when move ordering is good.
+
+### Phase 2: Search Efficiency
+
+- **Aspiration Windows:** Start each iterative deepening iteration with a narrow window around the previous score. Most iterations confirm the score, saving work. Re-search with a wider window on fail.
+- **Check Extensions:** Extend search by one ply when in check, since check positions are tactically sharp and shouldn't be cut short by depth limits.
+- **SEE (Static Exchange Evaluation):** Evaluate capture sequences without actually searching them. Replaces MVV-LVA for capture ordering and enables pruning of clearly losing captures in quiescence.
+
+### Phase 3: Evaluation Refinement
+
+Search improvements plateau without better evaluation to guide the search.
+
+- **Pawn Structure:** Penalize doubled and isolated pawns, bonus for passed pawns. High impact in endgames where pawn structure determines the outcome.
+- **Positional Terms:** Bishop pair bonus, rook on open file, king safety, mobility.
+- **Automated Tuning:** Once enough evaluation terms exist, use Texel tuning or similar to optimize weights against a corpus of games.
+
+### Phase 4: Infrastructure & Correctness
+
+- **Draw Detection:** Repetition and 50-move rule detection. Currently the engine can't detect draws, which causes it to shuffle pieces in drawn endgames instead of seeking other plans.
+- **Fixed-Size TT Array:** Replace `map[uint64]TTEntry` with a fixed-size slice indexed by `hash % size`. Eliminates GC pressure, improves cache locality, and allows memory budget control via UCI `Hash` option.
+- **Incremental Zobrist Updates:** Update the hash incrementally on Move/UndoMove instead of recomputing from scratch. Reduces hashing cost from O(pieces) to O(1) per move.
+- **Endgame Tablebases:** Syzygy tablebase support for perfect play in positions with ≤6 pieces.
 
 ---
 
@@ -262,4 +294,4 @@ If you're unsure where to start, feel free to ask!
 
 ## 8. 📄 License
 
-This project is licensed under the [MIT License](./LICENSE) (or specify your chosen license).
+This project is licensed under the [MIT License](./LICENSE)
diff --git a/pkg/search.go b/pkg/search.go
index 6a5e1c3..060cc16 100644
--- a/pkg/search.go
+++ b/pkg/search.go
@@ -304,9 +304,26 @@ func (board *Board) AlphaBetaSearch(depth int, maximizing bool, alpha int, beta
 	}
 
 	hash := board.ZobristHash()
-	if score, ok := tt.Get(hash, depth); ok {
+	if entry, ok := tt.Get(hash, depth); ok {
 		stats.IncTTHit()
-		return score
+		switch entry.Bound {
+		case BoundExact:
+			return entry.Score
+		case BoundLower:
+			if entry.Score >= beta {
+				return entry.Score
+			}
+			if entry.Score > alpha && maximizing {
+				alpha = entry.Score
+			}
+		case BoundUpper:
+			if entry.Score <= alpha {
+				return entry.Score
+			}
+			if entry.Score < beta && !maximizing {
+				beta = entry.Score
+			}
+		}
 	}
 
 	moves := board.GenerateLegalMoves()
@@ -316,6 +333,8 @@ func (board *Board) AlphaBetaSearch(depth int, maximizing bool, alpha int, beta
 		return board.MateOrStalemateScore(maximizing)
 	}
 
+	origAlpha := alpha
+	origBeta := beta
 	var result int
 	var bestMove Move
 	if maximizing {
@@ -338,7 +357,6 @@ func (board *Board) AlphaBetaSearch(depth int, maximizing bool, alpha int, beta
 				stats.IncBetaCutoff()
 				if move.MoveType != MoveCapture && move.MoveType != MovePromotionCapture {
 					ctx.AddKillerMove(move, ply)
-					// Update history heuristic for quiet moves
 					ctx.HistoryHeuristic[PieceToHistoryIndex[move.Piece]][move.To] += depth * depth
 				}
 				nodesPruned := len(moves) - (i + 1)
@@ -369,7 +387,6 @@ func (board *Board) AlphaBetaSearch(depth int, maximizing bool, alpha int, beta
 				stats.IncBetaCutoff()
 				if move.MoveType != MoveCapture && move.MoveType != MovePromotionCapture {
 					ctx.AddKillerMove(move, ply)
-					// Update history heuristic for quiet moves
 					ctx.HistoryHeuristic[PieceToHistoryIndex[move.Piece]][move.To] += depth * depth
 				}
 				nodesPruned := len(moves) - (i + 1)
@@ -382,7 +399,23 @@ func (board *Board) AlphaBetaSearch(depth int, maximizing bool, alpha int, beta
 		result = minEval
 	}
 
+	// Determine bound type
+	var bound byte = BoundExact
+	if maximizing {
+		if result <= origAlpha {
+			bound = BoundUpper
+		} else if result >= beta {
+			bound = BoundLower
+		}
+	} else {
+		if result >= origBeta {
+			bound = BoundLower
+		} else if result <= alpha {
+			bound = BoundUpper
+		}
+	}
+
 	stats.IncTTStore()
-	tt.Set(hash, depth, result, bestMove)
+	tt.Set(hash, depth, result, bestMove, bound)
 	return result
 }
diff --git a/pkg/tt.go b/pkg/tt.go
index ee4601f..e8430bc 100644
--- a/pkg/tt.go
+++ b/pkg/tt.go
@@ -2,19 +2,21 @@ package libra
 
 import "sync"
 
-type TTKey struct {
-	Hash  uint64
-	Depth int
-}
+const (
+	BoundExact = iota
+	BoundLower // score is a lower bound (beta cutoff)
+	BoundUpper // score is an upper bound (failed low)
+)
 
 type TTEntry struct {
 	Score    int
 	BestMove Move
 	Depth    int
+	Bound    byte
 }
 
 type TranspositionTable struct {
-	table map[uint64]TTEntry // hash -> entry
+	table map[uint64]TTEntry
 	mu    sync.RWMutex
 }
 
@@ -24,22 +26,22 @@ func NewTranspositionTable() *TranspositionTable {
 	}
 }
 
-func (tt *TranspositionTable) Get(hash uint64, depth int) (int, bool) {
+func (tt *TranspositionTable) Get(hash uint64, depth int) (TTEntry, bool) {
 	tt.mu.RLock()
 	defer tt.mu.RUnlock()
 	entry, ok := tt.table[hash]
 	if !ok || entry.Depth < depth {
-		return 0, false
+		return TTEntry{}, false
 	}
-	return entry.Score, true
+	return entry, true
 }
 
-func (tt *TranspositionTable) Set(hash uint64, depth int, value int, bestMove Move) {
+func (tt *TranspositionTable) Set(hash uint64, depth int, value int, bestMove Move, bound byte) {
 	tt.mu.Lock()
 	defer tt.mu.Unlock()
 	entry, ok := tt.table[hash]
 	if !ok || depth >= entry.Depth {
-		tt.table[hash] = TTEntry{Score: value, BestMove: bestMove, Depth: depth}
+		tt.table[hash] = TTEntry{Score: value, BestMove: bestMove, Depth: depth, Bound: bound}
 	}
 }