From 6c8676e9dd5223cb43eeb29a1528b9d9a37f7d39 Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sat, 7 Mar 2026 11:29:43 -0500
Subject: [PATCH 1/4] update operation

---
 CMakeLists.txt              |   2 +-
 antlr/TundraQL.g4           |  16 +-
 docs/tundraql.html          |  93 +++++++-
 include/core.hpp            |  97 ++++++++
 include/node.hpp            |  30 +++
 include/query.hpp           | 174 ++++++++++++++
 src/core.cpp                | 193 +++++++++++++++
 src/tundra_shell.cpp        | 184 ++++++++++++++
 tests/CMakeLists.txt        |  22 ++
 tests/update_query_test.cpp | 461 ++++++++++++++++++++++++++++++++++++
 10 files changed, 1269 insertions(+), 3 deletions(-)
 create mode 100644 tests/update_query_test.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec14971..9e1b7c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(tundradb VERSION 1.0.0 LANGUAGES CXX C)
 set(CMAKE_CXX_STANDARD 23)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-# Option: build shared or static library (default: static)
+# Option: bui   ld shared or static library (default: static)
 option(BUILD_SHARED_LIBS "Build TundraDB as a shared library" OFF)
 option(TUNDRADB_BUILD_SHELL "Build the tundra_shell interactive CLI" ON)
 option(TUNDRADB_BUILD_TESTS "Build tests" ON)
diff --git a/antlr/TundraQL.g4 b/antlr/TundraQL.g4
index 9b9df70..4b4a50e 100644
--- a/antlr/TundraQL.g4
+++ b/antlr/TundraQL.g4
@@ -1,7 +1,7 @@
 grammar TundraQL;
 
 // Entry point for parsing a full command
-statement: createSchemaStatement | createNodeStatement | createEdgeStatement | matchStatement | deleteStatement | commitStatement | showStatement EOF;
+statement: createSchemaStatement | createNodeStatement | createEdgeStatement | matchStatement | deleteStatement | updateStatement | commitStatement | showStatement EOF;
 
 // --- Schema Definition ---
 createSchemaStatement: K_CREATE K_SCHEMA IDENTIFIER LPAREN schemaFieldList RPAREN SEMI;
@@ -47,6 +47,18 @@ edgeDeleteTarget:
     | K_EDGE IDENTIFIER K_TO nodeSelector                 // DELETE EDGE edge_type TO node;
     | K_EDGE IDENTIFIER K_FROM nodeSelector K_TO nodeSelector;  // DELETE EDGE edge_type FROM node TO node;
 
+// --- Update Statement ---
+// UPDATE User(0) SET name = "Bob", age = 31;
+// UPDATE (u:User) SET u.age = 31 WHERE u.name = "Alice";
+updateStatement: K_UPDATE updateTarget K_SET setClause (K_WHERE whereClause)? SEMI;
+
+updateTarget:
+    nodeLocator                    // UPDATE User(0) SET ...;
+    | nodePattern;                 // UPDATE (u:User) SET ... WHERE ...;
+
+setClause: setAssignment (COMMA setAssignment)*;
+setAssignment: IDENTIFIER (DOT IDENTIFIER)? EQ value;
+
 // --- Commit Statement ---
 commitStatement: K_COMMIT SEMI;
 
@@ -106,6 +118,8 @@ K_RIGHT: 'RIGHT';
 K_FULL: 'FULL';
 K_AND: 'AND';
 K_OR: 'OR';
+K_UPDATE: 'UPDATE';
+K_SET: 'SET';
 K_COMMIT: 'COMMIT';
 K_UNIQUE: 'UNIQUE';
 K_SHOW: 'SHOW';
diff --git a/docs/tundraql.html b/docs/tundraql.html
index ff90f06..7701c53 100644
--- a/docs/tundraql.html
+++ b/docs/tundraql.html
@@ -129,6 +129,7 @@ <h4>Statements</h4>
             <a href="#node">CREATE NODE</a>
             <a href="#edge">CREATE EDGE</a>
             <a href="#match">MATCH</a>
+            <a href="#update">UPDATE</a>
             <a href="#delete">DELETE</a>
             <a href="#commit">COMMIT</a>
             <a href="#show">SHOW</a>
@@ -136,6 +137,7 @@ <h4>Statements</h4>
             <h4>Clauses</h4>
             <a href="#where">WHERE</a>
             <a href="#select">SELECT</a>
+            <a href="#set">SET</a>
             <a href="#joins">JOIN Types</a>
             <div class="sep"></div>
             <h4>Reference</h4>
@@ -300,6 +302,58 @@ <h2 id="delete">DELETE</h2>
 <span class="kw">DELETE EDGE</span> <span class="edge-type">works_at</span> <span class="kw">TO</span> <span class="alias">Company</span>(<span class="num">0</span>);</code></pre>
             </div>
 
+            <!-- ========== UPDATE ========== -->
+            <h2 id="update">UPDATE</h2>
+            <p>Modifies field values on existing nodes. Supports two forms: <strong>by ID</strong> (direct) and <strong>by pattern</strong> (with optional <code>WHERE</code> filter).</p>
+
+            <h3>Form 1 — Update by ID</h3>
+            <p>Targets a single node using <code>Schema(id)</code>. Field names are bare (no alias prefix).</p>
+
+            <div class="syntax-block">
+                <div class="label">Syntax <span class="tag">DML</span></div>
+                <pre><code><span class="kw">UPDATE</span> <span class="alias">Schema</span>(<span class="num">id</span>) <span class="kw">SET</span> <span class="alias">field</span> = <span class="str">value</span> [, <span class="alias">field</span> = <span class="str">value</span> ...] ;</code></pre>
+            </div>
+
+            <div class="syntax-block">
+                <div class="label">Examples</div>
+                <pre><code><span class="cmt">// Update a single field</span>
+<span class="kw">UPDATE</span> <span class="alias">User</span>(<span class="num">0</span>) <span class="kw">SET</span> <span class="alias">age</span> = <span class="num">31</span>;
+
+<span class="cmt">// Update multiple fields at once (creates one version)</span>
+<span class="kw">UPDATE</span> <span class="alias">User</span>(<span class="num">0</span>) <span class="kw">SET</span> <span class="alias">name</span> = <span class="str">"Alice B."</span>, <span class="alias">age</span> = <span class="num">31</span>;</code></pre>
+            </div>
+
+            <h3>Form 2 — Update by Pattern</h3>
+            <p>Uses a node pattern <code>(alias:Schema)</code> with an optional <code>WHERE</code> clause to match nodes. Field names must be alias-qualified (<code>alias.field</code>).</p>
+
+            <div class="syntax-block">
+                <div class="label">Syntax <span class="tag">DML</span></div>
+                <pre><code><span class="kw">UPDATE</span> (<span class="alias">alias</span>:<span class="type">Schema</span>) <span class="kw">SET</span> <span class="alias">alias</span>.<span class="alias">field</span> = <span class="str">value</span> [, ...]
+    [<span class="kw">WHERE</span> <span class="alias">alias</span>.<span class="alias">field</span> <span class="op">op</span> <span class="str">value</span>] ;</code></pre>
+            </div>
+
+            <div class="syntax-block">
+                <div class="label">Examples</div>
+                <pre><code><span class="cmt">// Update all users named Alice</span>
+<span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">age</span> = <span class="num">31</span>
+    <span class="kw">WHERE</span> <span class="alias">u</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Alice"</span>;
+
+<span class="cmt">// Update with compound condition</span>
+<span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">name</span> = <span class="str">"Senior"</span>
+    <span class="kw">WHERE</span> <span class="alias">u</span>.<span class="alias">age</span> <span class="op">&gt;</span> <span class="num">30</span> <span class="kw">AND</span> <span class="alias">u</span>.<span class="alias">age</span> <span class="op">&lt;</span> <span class="num">50</span>;
+
+<span class="cmt">// Update all nodes of a schema (no WHERE)</span>
+<span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">age</span> = <span class="num">0</span>;</code></pre>
+            </div>
+
+            <div class="info-box">
+                <strong>Versioning:</strong> When multiple fields are updated in a single <code>SET</code> clause, TundraDB creates <strong>one version</strong> for the entire batch — not one per field.
+            </div>
+
+            <div class="warn-box">
+                <strong>Pattern form requires alias prefix:</strong> <code>SET age = 31</code> is only valid in the by-ID form. In the pattern form you must write <code>SET u.age = 31</code>.
+            </div>
+
             <!-- ========== COMMIT ========== -->
             <h2 id="commit">COMMIT</h2>
             <p>Persists the current database state to disk (Parquet files + JSON metadata).</p>
@@ -360,6 +414,37 @@ <h2 id="select">SELECT Clause</h2>
     <span class="kw">SELECT</span> <span class="alias">u</span>.<span class="alias">name</span> <span class="kw">AS</span> <span class="alias">employee</span>, <span class="alias">c</span>.<span class="alias">name</span> <span class="kw">AS</span> <span class="alias">company</span>, <span class="alias">u</span>.<span class="alias">age</span>;</code></pre>
             </div>
 
+            <!-- ========== SET ========== -->
+            <h2 id="set">SET Clause</h2>
+            <p>Specifies field assignments in an <code>UPDATE</code> statement. Comma-separated list of <code>field = value</code> pairs.</p>
+
+            <div class="syntax-block">
+                <div class="label">Syntax</div>
+                <pre><code><span class="kw">SET</span> <span class="alias">field</span> = <span class="str">value</span> [, <span class="alias">field</span> = <span class="str">value</span> ...]</code></pre>
+            </div>
+
+            <table class="type-table">
+                <tr>
+                    <th>Update Form</th>
+                    <th>Field Name Format</th>
+                    <th>Example</th>
+                </tr>
+                <tr>
+                    <td>By ID</td>
+                    <td>Bare name</td>
+                    <td><code>SET name = "Alice", age = 31</code></td>
+                </tr>
+                <tr>
+                    <td>By Pattern</td>
+                    <td>Alias-qualified</td>
+                    <td><code>SET u.name = "Alice", u.age = 31</code></td>
+                </tr>
+            </table>
+
+            <div class="info-box">
+                <strong>Batch semantics:</strong> All assignments in a single <code>SET</code> clause are applied atomically — one version is created per node, regardless of how many fields are changed.
+            </div>
+
             <!-- ========== JOIN TYPES ========== -->
             <h2 id="joins">JOIN Types</h2>
             <p>Specified inside the edge pattern <code>-[:EDGE_TYPE <em>JOIN</em>]-></code>. Controls how unmatched nodes are handled.</p>
@@ -481,7 +566,13 @@ <h2 id="patterns">Pattern Syntax Reference</h2>
     <span class="kw">WHERE</span> <span class="alias">u</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Alice"</span> <span class="kw">AND</span> <span class="alias">c</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Google"</span>
     <span class="kw">SELECT</span> <span class="alias">u</span>.<span class="alias">name</span> <span class="kw">AS</span> <span class="alias">user</span>, <span class="alias">f</span>.<span class="alias">name</span> <span class="kw">AS</span> <span class="alias">friend</span>, <span class="alias">c</span>.<span class="alias">name</span> <span class="kw">AS</span> <span class="alias">company</span>;
 
-<span class="cmt">// 5. Persist</span>
+<span class="cmt">// 5. Update: Alice turned 31</span>
+<span class="kw">UPDATE</span> <span class="alias">User</span>(<span class="num">0</span>) <span class="kw">SET</span> <span class="alias">age</span> = <span class="num">31</span>;
+
+<span class="cmt">// 6. Bulk update: set all users older than 30 to "Senior"</span>
+<span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">name</span> = <span class="str">"Senior"</span> <span class="kw">WHERE</span> <span class="alias">u</span>.<span class="alias">age</span> <span class="op">&gt;</span> <span class="num">30</span>;
+
+<span class="cmt">// 7. Persist</span>
 <span class="kw">COMMIT</span>;</code></pre>
             </div>
 
diff --git a/include/core.hpp b/include/core.hpp
index 5305c44..caad2da 100644
--- a/include/core.hpp
+++ b/include/core.hpp
@@ -237,6 +237,23 @@ class Shard {
     return nodes_[node_id]->update(field, value, update_type);
   }
 
+  /**
+   * @brief Batch-update multiple fields on one node (creates 1 version).
+   */
+  arrow::Result<bool> update_fields(
+      const int64_t node_id,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      const UpdateType update_type) {
+    if (!nodes_.contains(node_id)) {
+      return arrow::Status::KeyError("Node not found: ", node_id);
+    }
+    dirty_ = true;
+    updated_ = true;
+    updated_ts_ = now_millis();
+    return nodes_[node_id]->update_fields(field_updates, update_type);
+  }
+
   arrow::Result<std::shared_ptr<arrow::Table>> get_table(TemporalContext *ctx) {
     // if we have ctx we need to create a new table every time
     if (dirty_ || !table_ || ctx) {
@@ -572,6 +589,29 @@ class ShardManager {
                                    schema_name);
   }
 
+  /**
+   * @brief Batch-update multiple fields on one node (creates 1 version).
+   */
+  arrow::Result<bool> update_node_fields(
+      const std::string &schema_name, const int64_t id,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      const UpdateType update_type) {
+    auto schema_it = shards_.find(schema_name);
+    if (schema_it == shards_.end()) {
+      return arrow::Status::KeyError("Schema not found: ", schema_name);
+    }
+
+    for (const auto &shard : schema_it->second) {
+      if (id >= shard->min_id && id <= shard->max_id) {
+        return shard->update_fields(id, field_updates, update_type);
+      }
+    }
+
+    return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
+                                   schema_name);
+  }
+
   arrow::Result<std::vector<std::shared_ptr<Node>>> get_nodes(
       const std::string &schema_name) {
     const auto schema_it = shards_.find(schema_name);
@@ -785,6 +825,18 @@ class Database {
                                        update_type);
   }
 
+  /**
+   * @brief Batch-update multiple fields on one node (creates 1 version).
+   */
+  arrow::Result<bool> update_node_fields(
+      const std::string &schema_name, const int64_t id,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      const UpdateType update_type) {
+    return shard_manager_->update_node_fields(schema_name, id, field_updates,
+                                              update_type);
+  }
+
   arrow::Result<bool> remove_node(const std::string &schema_name,
                                   int64_t node_id) {
     if (auto res = node_manager_->remove_node(schema_name, node_id); !res) {
@@ -877,6 +929,51 @@ class Database {
 
   [[nodiscard]] arrow::Result<std::shared_ptr<QueryResult>> query(
       const Query &query) const;
+
+  /**
+   * @brief Execute an UpdateQuery.
+   *
+   * Mode 1 — by ID (bare field names):
+   *   db.update(UpdateQuery::on("User", 0).set("age", Value(31)).build());
+   *
+   * Mode 2 — by MATCH query (alias-qualified SET, multi-schema):
+   *   db.update(UpdateQuery::match(
+   *       Query::from("u:User")
+   *           .traverse("u", "WORKS_AT", "c:Company")
+   *           .where("c.name", CompareOp::Eq, Value("Google"))
+   *           .build()
+   *   ).set("u.status", Value("employed"))
+   *    .set("c.size", Value(int32_t(5001)))
+   *    .build());
+   */
+  [[nodiscard]] arrow::Result<UpdateResult> update(const UpdateQuery &uq);
+
+ private:
+  /** Mode 1: update a single node by schema + ID. */
+  [[nodiscard]] arrow::Result<UpdateResult> update_by_id(const UpdateQuery &uq);
+
+  /** Mode 2: find nodes via MATCH query, then batch-update each. */
+  [[nodiscard]] arrow::Result<UpdateResult> update_by_match(
+      const UpdateQuery &uq);
+
+  /**
+   * Apply field updates to every node whose ID appears in @p id_column.
+   * One call to update_node_fields() per unique node ID (1 version each).
+   */
+  void apply_updates(
+      const std::string &schema_name,
+      const std::shared_ptr<arrow::ChunkedArray> &id_column,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>> &fields,
+      UpdateType update_type, UpdateResult &result);
+
+  /**
+   * Build an alias→schema mapping from a Query's FROM + TRAVERSE clauses.
+   * Only declarations ("alias:Schema") are recorded; bare references ("alias")
+   * are skipped.  Returns an error if the same alias is bound to two different
+   * schemas.
+   */
+  static arrow::Result<std::unordered_map<std::string, std::string>>
+  resolve_alias_map(const Query &query);
 };
 
 }  // namespace tundradb
diff --git a/include/node.hpp b/include/node.hpp
index 757aa82..b764bb1 100644
--- a/include/node.hpp
+++ b/include/node.hpp
@@ -114,6 +114,36 @@ class Node {
     return true;
   }
 
+  /**
+   * @brief Batch-update multiple fields in a single version.
+   *
+   * When using the arena (versioned storage), this creates exactly ONE
+   * new version for all field updates instead of N versions.
+   */
+  arrow::Result<bool> update_fields(
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      UpdateType update_type) {
+    if (field_updates.empty()) return true;
+
+    if (arena_ != nullptr) {
+      return arena_->update_fields(*handle_, layout_, field_updates);
+    }
+
+    // Non-arena fallback: update data_ map directly
+    for (const auto &[field, value] : field_updates) {
+      if (data_.find(field->name()) == data_.end()) {
+        return arrow::Status::KeyError("Field not found: ", field->name());
+      }
+      switch (update_type) {
+        case SET:
+          data_[field->name()] = value;
+          break;
+      }
+    }
+    return true;
+  }
+
   [[deprecated]]
   arrow::Result<bool> set_value(const std::string &field, const Value &value) {
     log_warn("set_value by string is deprecated");
diff --git a/include/query.hpp b/include/query.hpp
index c4cc92f..18d4e69 100644
--- a/include/query.hpp
+++ b/include/query.hpp
@@ -15,6 +15,7 @@
 #include <sstream>
 #include <string>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include "node.hpp"
@@ -630,6 +631,179 @@ class QueryResult {
   QueryExecutionStats stats_;
 };
 
+/** @brief A single field assignment: field_name = value. */
+struct SetAssignment {
+  std::string field_name;
+  Value value;
+
+  SetAssignment(std::string field_name, Value value)
+      : field_name(std::move(field_name)), value(std::move(value)) {}
+};
+
+/**
+ * @brief Describes an UPDATE operation.
+ *
+ * Two modes:
+ *
+ *   **Mode 1 — by ID** (no query engine involved, bare field names):
+ *   @code
+ *   UpdateQuery::on("User", 0).set("age", Value(31)).build();
+ *   @endcode
+ *
+ *   **Mode 2 — by MATCH query** (alias-qualified SET fields):
+ *   @code
+ *   // Simple WHERE — update one schema:
+ *   UpdateQuery::match(
+ *       Query::from("u:User")
+ *           .where("u.city", CompareOp::Eq, Value("NYC"))
+ *           .build()
+ *   ).set("u.status", Value("active")).build();
+ *
+ *   // Traversal — update multiple schemas:
+ *   UpdateQuery::match(
+ *       Query::from("u:User")
+ *           .traverse("u", "WORKS_AT", "c:Company")
+ *           .where("c.name", CompareOp::Eq, Value("Google"))
+ *           .build()
+ *   ).set("u.status", Value("employed"))
+ *    .set("c.size", Value(int32_t(5001)))
+ *    .build();
+ *   @endcode
+ *
+ *   In Mode 2, each SET field must be alias-qualified ("alias.field").
+ *   The target aliases are derived from the SET assignments automatically.
+ */
+class UpdateQuery {
+ public:
+  class Builder;
+
+  /** @brief Schema name (Mode 1 only). */
+  [[nodiscard]] const std::string& schema() const { return schema_; }
+
+  [[nodiscard]] const std::vector<SetAssignment>& assignments() const {
+    return assignments_;
+  }
+
+  /** @brief Node ID for Mode 1 (direct update). */
+  [[nodiscard]] const std::optional<int64_t>& node_id() const {
+    return node_id_;
+  }
+
+  /** @brief The MATCH query for Mode 2 (query-based update). */
+  [[nodiscard]] const std::optional<Query>& match_query() const {
+    return match_query_;
+  }
+
+  /** @brief True if this is a Mode 2 (query-based) update. */
+  [[nodiscard]] bool has_match() const { return match_query_.has_value(); }
+
+  [[nodiscard]] UpdateType update_type() const { return update_type_; }
+
+  /**
+   * @brief Extract unique aliases referenced in SET assignments.
+   *
+   * For Mode 2 only.  Parses each "alias.field" to collect the set of
+   * distinct alias prefixes (e.g. {"u", "c"}).
+   */
+  [[nodiscard]] std::vector<std::string> target_aliases() const {
+    std::vector<std::string> aliases;
+    for (const auto& a : assignments_) {
+      auto dot = a.field_name.find('.');
+      if (dot == std::string::npos) continue;
+      std::string alias = a.field_name.substr(0, dot);
+      if (std::find(aliases.begin(), aliases.end(), alias) == aliases.end()) {
+        aliases.push_back(alias);
+      }
+    }
+    return aliases;
+  }
+
+  /** @brief Mode 1 — target a specific node by schema + ID. */
+  static Builder on(const std::string& schema, int64_t node_id) {
+    return {schema, node_id};
+  }
+
+  /** @brief Mode 2 — target nodes found by a MATCH query. */
+  static Builder match(Query query) { return Builder{std::move(query)}; }
+
+  class Builder {
+   public:
+    /** @brief Mode 1 constructor: update a specific node by ID. */
+    Builder(std::string schema, int64_t node_id)
+        : schema_(std::move(schema)), node_id_(node_id) {}
+
+    /** @brief Mode 2 constructor: update nodes found by a MATCH query. */
+    explicit Builder(Query query) : match_query_(std::move(query)) {}
+
+    /**
+     * @brief Add a field assignment.
+     *
+     * - Mode 1: bare name  — set("age", Value(31))
+     * - Mode 2: qualified  — set("u.age", Value(31))
+     */
+    Builder& set(std::string field_name, Value value) {
+      assignments_.emplace_back(std::move(field_name), std::move(value));
+      return *this;
+    }
+
+    /** @brief Override the update type (default: SET). */
+    Builder& type(UpdateType t) {
+      update_type_ = t;
+      return *this;
+    }
+
+    /** @brief Build the immutable UpdateQuery (rvalue). */
+    [[nodiscard]] UpdateQuery build() && {
+      if (assignments_.empty()) {
+        throw std::runtime_error(
+            "UpdateQuery must have at least one SET assignment");
+      }
+      return UpdateQuery(std::move(schema_), std::move(assignments_), node_id_,
+                         std::move(match_query_), update_type_);
+    }
+
+    /** @brief Build the immutable UpdateQuery (lvalue). */
+    [[nodiscard]] UpdateQuery build() & {
+      if (assignments_.empty()) {
+        throw std::runtime_error(
+            "UpdateQuery must have at least one SET assignment");
+      }
+      return UpdateQuery(schema_, assignments_, node_id_, match_query_,
+                         update_type_);
+    }
+
+   private:
+    std::string schema_;
+    std::vector<SetAssignment> assignments_;
+    std::optional<int64_t> node_id_;
+    std::optional<Query> match_query_;
+    UpdateType update_type_ = UpdateType::SET;
+  };
+
+ private:
+  UpdateQuery(std::string schema, std::vector<SetAssignment> assignments,
+              std::optional<int64_t> node_id, std::optional<Query> match_query,
+              UpdateType update_type)
+      : schema_(std::move(schema)),
+        assignments_(std::move(assignments)),
+        node_id_(std::move(node_id)),
+        match_query_(std::move(match_query)),
+        update_type_(update_type) {}
+
+  std::string schema_;
+  std::vector<SetAssignment> assignments_;
+  std::optional<int64_t> node_id_;
+  std::optional<Query> match_query_;
+  UpdateType update_type_;
+};
+
+/** @brief Result of an update operation. */
+struct UpdateResult {
+  int64_t updated_count = 0;        ///< Number of nodes updated.
+  int64_t failed_count = 0;         ///< Number of nodes that failed to update.
+  std::vector<std::string> errors;  ///< Error messages for failed updates.
+};
+
 }  // namespace tundradb
 
 #endif  // QUERY_HPP
diff --git a/src/core.cpp b/src/core.cpp
index 9f8b3e2..f57e240 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -848,4 +848,197 @@ arrow::Result<std::shared_ptr<QueryResult>> Database::query(
   return result;
 }
 
+// ---------------------------------------------------------------------------
+// Database::update  - dispatch to Mode 1 or Mode 2
+// ---------------------------------------------------------------------------
+arrow::Result<UpdateResult> Database::update(const UpdateQuery& uq) {
+  if (uq.node_id().has_value()) {
+    return update_by_id(uq);
+  }
+  if (uq.has_match()) {
+    return update_by_match(uq);
+  }
+  return arrow::Status::Invalid(
+      "UpdateQuery must specify a node ID or a MATCH query");
+}
+
+// ---------------------------------------------------------------------------
+// Mode 1: update a single node by schema + ID
+// ---------------------------------------------------------------------------
+arrow::Result<UpdateResult> Database::update_by_id(const UpdateQuery& uq) {
+  UpdateResult result;
+
+  auto schema_result = schema_registry_->get(uq.schema());
+  if (!schema_result.ok()) {
+    return arrow::Status::KeyError("Schema '", uq.schema(), "' not found");
+  }
+  const auto& schema = schema_result.ValueOrDie();
+
+  // Resolve fields upfront - fail early on bad field names
+  std::vector<std::pair<std::shared_ptr<Field>, Value>> resolved;
+  resolved.reserve(uq.assignments().size());
+  for (const auto& a : uq.assignments()) {
+    auto field = schema->get_field(a.field_name);
+    if (!field) {
+      return arrow::Status::Invalid(
+          "Field '", a.field_name, "' not found in schema '", uq.schema(), "'");
+    }
+    resolved.emplace_back(field, a.value);
+  }
+
+  const int64_t id = uq.node_id().value();
+  if (const auto r =
+          update_node_fields(uq.schema(), id, resolved, uq.update_type());
+      !r.ok()) {
+    result.failed_count++;
+    result.errors.push_back(uq.schema() + "(" + std::to_string(id) +
+                            "): " + r.status().ToString());
+  } else {
+    result.updated_count = 1;
+  }
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Mode 2: find nodes via MATCH query, then batch-update each
+// ---------------------------------------------------------------------------
+arrow::Result<UpdateResult> Database::update_by_match(const UpdateQuery& uq) {
+  UpdateResult result;
+  const auto& match_query = uq.match_query().value();
+
+  // 1. Resolve alias -> schema mapping (declarations only, with validation)
+  ARROW_ASSIGN_OR_RAISE(auto alias_to_schema, resolve_alias_map(match_query));
+
+  // 2. Group SET assignments by alias: { alias -> (schema, [(Field,Value)]) }
+  struct AliasUpdate {
+    std::string schema_name;
+    std::vector<std::pair<std::shared_ptr<Field>, Value>> fields;
+  };
+  std::unordered_map<std::string, AliasUpdate> grouped;
+
+  for (const auto& a : uq.assignments()) {
+    auto dot = a.field_name.find('.');
+    if (dot == std::string::npos) {
+      return arrow::Status::Invalid(
+          "SET field '", a.field_name,
+          "' must be alias-qualified (e.g. u.age) in a MATCH-based update");
+    }
+    std::string alias = a.field_name.substr(0, dot);
+    std::string bare_field = a.field_name.substr(dot + 1);
+
+    auto it = alias_to_schema.find(alias);
+    if (it == alias_to_schema.end()) {
+      return arrow::Status::Invalid("Alias '", alias,
+                                    "' not found in MATCH query");
+    }
+
+    auto schema_result = schema_registry_->get(it->second);
+    if (!schema_result.ok()) {
+      return arrow::Status::KeyError("Schema '", it->second, "' not found");
+    }
+    const auto& schema = schema_result.ValueOrDie();
+    auto field = schema->get_field(bare_field);
+    if (!field) {
+      return arrow::Status::Invalid("Field '", bare_field,
+                                    "' not found in schema '", it->second, "'");
+    }
+
+    auto& entry = grouped[alias];
+    if (entry.schema_name.empty()) entry.schema_name = it->second;
+    entry.fields.emplace_back(field, a.value);
+  }
+
+  // 3. Build ID-only SELECT: we only need "u.id", "c.id", etc.
+  std::vector<std::string> id_columns;
+  id_columns.reserve(grouped.size());
+  for (const auto& alias : grouped | std::views::keys) {
+    id_columns.push_back(alias + ".id");
+  }
+  Query id_query(match_query.from(), match_query.clauses(),
+                 std::make_shared<Select>(std::move(id_columns)),
+                 match_query.inline_where(), match_query.execution_config(),
+                 match_query.temporal_snapshot());
+
+  // 4. Run the MATCH query once
+  ARROW_ASSIGN_OR_RAISE(auto query_result, this->query(id_query));
+  auto table = query_result->table();
+  if (!table || table->num_rows() == 0) {
+    return result;
+  }
+
+  // 5. Apply updates per alias group
+  for (const auto& [alias, info] : grouped) {
+    auto id_column = table->GetColumnByName(alias + ".id");
+    if (!id_column) {
+      return arrow::Status::Invalid("Could not find '", alias,
+                                    ".id' column in query results");
+    }
+    apply_updates(info.schema_name, id_column, info.fields, uq.update_type(),
+                  result);
+  }
+
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// apply_updates - iterate an ID column and batch-update each node
+// ---------------------------------------------------------------------------
+void Database::apply_updates(
+    const std::string& schema_name,
+    const std::shared_ptr<arrow::ChunkedArray>& id_column,
+    const std::vector<std::pair<std::shared_ptr<Field>, Value>>& fields,
+    UpdateType update_type, UpdateResult& result) {
+  for (int ci = 0; ci < id_column->num_chunks(); ci++) {
+    const auto chunk =
+        std::static_pointer_cast<arrow::Int64Array>(id_column->chunk(ci));
+    for (int64_t i = 0; i < chunk->length(); i++) {
+      if (chunk->IsNull(i)) continue;
+      const int64_t node_id = chunk->Value(i);
+
+      if (auto r =
+              update_node_fields(schema_name, node_id, fields, update_type);
+          !r.ok()) {
+        result.failed_count++;
+        result.errors.push_back(schema_name + "(" + std::to_string(node_id) +
+                                "): " + r.status().ToString());
+      } else {
+        result.updated_count++;
+      }
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// resolve_alias_map - build alias->schema from declarations, reject conflicts
+// ---------------------------------------------------------------------------
+arrow::Result<std::unordered_map<std::string, std::string>>
+Database::resolve_alias_map(const Query& query) {
+  std::unordered_map<std::string, std::string> map;
+
+  auto register_ref = [&](const SchemaRef& ref) -> arrow::Status {
+    if (!ref.is_declaration()) return arrow::Status::OK();
+    const auto& alias = ref.value();
+    const auto& schema = ref.schema();
+    if (auto [it, inserted] = map.emplace(alias, schema);
+        !inserted && it->second != schema) {
+      return arrow::Status::Invalid("Alias '", alias, "' bound to '",
+                                    it->second, "' cannot be re-bound to '",
+                                    schema, "'");
+    }
+    return arrow::Status::OK();
+  };
+
+  ARROW_RETURN_NOT_OK(register_ref(query.from()));
+
+  for (const auto& clause : query.clauses()) {
+    if (clause->type() == Clause::Type::TRAVERSE) {
+      const auto t = std::static_pointer_cast<Traverse>(clause);
+      ARROW_RETURN_NOT_OK(register_ref(t->source()));
+      ARROW_RETURN_NOT_OK(register_ref(t->target()));
+    }
+  }
+
+  return map;
+}
+
 }  // namespace tundradb
diff --git a/src/tundra_shell.cpp b/src/tundra_shell.cpp
index 25e0274..432e08f 100644
--- a/src/tundra_shell.cpp
+++ b/src/tundra_shell.cpp
@@ -908,6 +908,155 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
     return deleted_count;
   }
 
+  // Handle UPDATE statements — delegates to UpdateQuery + Database::update()
+  antlrcpp::Any visitUpdateStatement(
+      tundraql::TundraQLParser::UpdateStatementContext* ctx) override {
+    spdlog::info("Executing UPDATE command");
+
+    auto updateTarget = ctx->updateTarget();
+    auto setClause = ctx->setClause();
+
+    // --- Determine schema name, alias, and optional node ID ---
+    std::string schema_name;
+    std::string alias;
+    std::optional<int64_t> node_id;
+
+    if (updateTarget->nodeLocator()) {
+      // UPDATE User(0) SET ...  — Mode 1 (by ID)
+      auto loc = updateTarget->nodeLocator();
+      schema_name = loc->IDENTIFIER()->getText();
+      node_id = std::stoll(loc->INTEGER_LITERAL()->getText());
+    } else if (updateTarget->nodePattern()) {
+      // UPDATE (u:User) SET ... WHERE ...  — Mode 2 (by MATCH)
+      auto pat = updateTarget->nodePattern();
+      if (pat->IDENTIFIER().size() > 1) {
+        alias = pat->IDENTIFIER(0)->getText();
+        schema_name = pat->IDENTIFIER(1)->getText();
+      } else {
+        alias = pat->IDENTIFIER(0)->getText();
+        schema_name = alias;
+      }
+    }
+
+    auto schema_registry = db.get_schema_registry();
+    tundradb::UpdateResult update_result;
+
+    if (node_id.has_value()) {
+      // ----- Mode 1: UPDATE User(0) SET age = 31; -----
+      // Bare field names, single schema.
+      auto schema_result = schema_registry->get(schema_name);
+      if (!schema_result.ok()) {
+        throw std::runtime_error("Schema '" + schema_name + "' not found");
+      }
+      auto schema = schema_result.ValueOrDie();
+
+      auto builder = tundradb::UpdateQuery::on(schema_name, *node_id);
+      for (auto assignment : setClause->setAssignment()) {
+        std::string field_name;
+        if (assignment->IDENTIFIER().size() == 2) {
+          field_name = assignment->IDENTIFIER(1)->getText();  // strip alias
+        } else {
+          field_name = assignment->IDENTIFIER(0)->getText();
+        }
+        std::string raw_value = assignment->value()->getText();
+        if (raw_value.size() >= 2 && raw_value.front() == '"' &&
+            raw_value.back() == '"') {
+          raw_value = raw_value.substr(1, raw_value.size() - 2);
+        }
+        auto field = schema->get_field(field_name);
+        if (!field) {
+          throw std::runtime_error("Field '" + field_name +
+                                   "' not found in schema '" + schema_name +
+                                   "'");
+        }
+        builder.set(field_name, parseValueForField(field, raw_value));
+      }
+
+      auto update_query = std::move(builder).build();
+      auto result = db.update(update_query);
+      if (!result.ok()) {
+        throw std::runtime_error("UPDATE failed: " +
+                                 result.status().ToString());
+      }
+      update_result = result.ValueOrDie();
+
+      *g_output_stream << "Updated " << schema_name << "(" << *node_id
+                       << "): " << update_query.assignments().size()
+                       << " field(s)" << std::endl;
+    } else {
+      // ----- Mode 2: UPDATE (u:User) SET u.age = 31 WHERE u.name = "Alice";
+      // Alias-qualified SET fields (e.g. "u.age").
+      auto query_builder = tundradb::Query::from(alias + ":" + schema_name);
+
+      if (ctx->whereClause()) {
+        processWhereClause(query_builder, ctx->whereClause());
+      }
+
+      auto match_query = query_builder.build();
+      auto builder = tundradb::UpdateQuery::match(std::move(match_query));
+
+      // Parse SET assignments — keep the alias.field format
+      for (auto assignment : setClause->setAssignment()) {
+        std::string qualified_name;
+        if (assignment->IDENTIFIER().size() == 2) {
+          // "u.age" → keep as "u.age"
+          qualified_name = assignment->IDENTIFIER(0)->getText() + "." +
+                           assignment->IDENTIFIER(1)->getText();
+        } else {
+          // bare field — assume the update target alias
+          qualified_name = alias + "." + assignment->IDENTIFIER(0)->getText();
+        }
+
+        // Resolve bare field for type conversion
+        std::string bare_field =
+            qualified_name.substr(qualified_name.find('.') + 1);
+
+        // Determine which schema this alias refers to
+        std::string set_alias =
+            qualified_name.substr(0, qualified_name.find('.'));
+        std::string set_schema = (set_alias == alias) ? schema_name : set_alias;
+
+        auto s_result = schema_registry->get(set_schema);
+        if (!s_result.ok()) {
+          throw std::runtime_error("Schema '" + set_schema + "' not found");
+        }
+        auto s = s_result.ValueOrDie();
+        auto field = s->get_field(bare_field);
+        if (!field) {
+          throw std::runtime_error("Field '" + bare_field +
+                                   "' not found in schema '" + set_schema +
+                                   "'");
+        }
+
+        std::string raw_value = assignment->value()->getText();
+        if (raw_value.size() >= 2 && raw_value.front() == '"' &&
+            raw_value.back() == '"') {
+          raw_value = raw_value.substr(1, raw_value.size() - 2);
+        }
+        builder.set(qualified_name, parseValueForField(field, raw_value));
+      }
+
+      auto update_query = std::move(builder).build();
+      auto result = db.update(update_query);
+      if (!result.ok()) {
+        throw std::runtime_error("UPDATE failed: " +
+                                 result.status().ToString());
+      }
+      update_result = result.ValueOrDie();
+
+      *g_output_stream << "Updated " << update_result.updated_count
+                       << " node(s): " << update_query.assignments().size()
+                       << " field(s)" << std::endl;
+    }
+
+    // Print any errors
+    for (const auto& err : update_result.errors) {
+      *g_output_stream << "Warning: " << err << std::endl;
+    }
+
+    return static_cast<int>(update_result.updated_count);
+  }
+
   // Handle SHOW statements
   antlrcpp::Any visitShowStatement(
       tundraql::TundraQLParser::ShowStatementContext* ctx) override {
@@ -1006,6 +1155,36 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
   }
 
  private:
+  // Helper: convert a raw string value to a typed Value based on field type
+  tundradb::Value parseValueForField(
+      const std::shared_ptr<tundradb::Field>& field,
+      const std::string& raw_value) {
+    auto field_type = field->type();
+    std::string cleaned = raw_value;
+    // Trim whitespace
+    cleaned.erase(0, cleaned.find_first_not_of(" \t\n\r"));
+    cleaned.erase(cleaned.find_last_not_of(" \t\n\r") + 1);
+
+    if (field_type == tundradb::ValueType::STRING) {
+      return tundradb::Value(cleaned);
+    } else if (field_type == tundradb::ValueType::INT64) {
+      return tundradb::Value(static_cast<int64_t>(std::stoll(cleaned)));
+    } else if (field_type == tundradb::ValueType::INT32) {
+      return tundradb::Value(static_cast<int32_t>(std::stoi(cleaned)));
+    } else if (field_type == tundradb::ValueType::DOUBLE ||
+               field_type == tundradb::ValueType::FLOAT) {
+      return tundradb::Value(std::stod(cleaned));
+    } else if (field_type == tundradb::ValueType::BOOL) {
+      std::string lower = cleaned;
+      std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
+      if (lower == "true" || lower == "1") return tundradb::Value(true);
+      if (lower == "false" || lower == "0") return tundradb::Value(false);
+      throw std::runtime_error("Invalid boolean value: " + cleaned);
+    }
+    throw std::runtime_error("Unsupported field type for SET: " +
+                             tundradb::to_string(field_type));
+  }
+
   // Helper method to extract schema name from a node selector
   std::string getSchemaFromSelector(
       tundraql::TundraQLParser::NodeSelectorContext* selector) {
@@ -1470,6 +1649,7 @@ static void completionCallback(const char* buf, linenoiseCompletions* lc) {
     // Empty buffer, show all top-level commands
     linenoiseAddCompletion(lc, "CREATE ");
     linenoiseAddCompletion(lc, "MATCH ");
+    linenoiseAddCompletion(lc, "UPDATE ");
     linenoiseAddCompletion(lc, "DELETE ");
     linenoiseAddCompletion(lc, "SHOW ");
     linenoiseAddCompletion(lc, "COMMIT");
@@ -1542,6 +1722,10 @@ static char* hintsCallback(const char* buf, int* color, int* bold) {
   if (strcmp(buf, "DELETE EDGE ") == 0) {
     return const_cast<char*>("edge_type [FROM node] [TO node]");
   }
+  if (strcmp(buf, "UPDATE ") == 0) {
+    return const_cast<char*>(
+        "User(0) SET field = value | (u:User) SET u.field = value WHERE ...");
+  }
   if (strcmp(buf, "SHOW ") == 0) {
     return const_cast<char*>("EDGES edge_type | EDGE TYPES");
   }
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 709e4d5..921d7eb 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -146,6 +146,10 @@ add_executable(node_view_test
 add_executable(temporal_query_test
         temporal_query_test.cpp)
 
+# Update query test
+add_executable(update_query_test
+        update_query_test.cpp)
+
 # Link against Arrow and GTest
 target_link_libraries(sharding_test
     PRIVATE
@@ -369,6 +373,19 @@ target_link_libraries(temporal_query_test
         LLVMSupport LLVMCore
 )
 
+target_link_libraries(update_query_test
+        PRIVATE
+        core
+        Arrow::arrow_shared
+        Parquet::parquet_shared
+        GTest::GTest
+        GTest::Main
+        pthread
+        TBB::tbb
+        spdlog::spdlog
+        LLVMSupport LLVMCore
+)
+
 # Apply sanitizer flags to all test targets if enabled
 if(ENABLE_SANITIZERS)
     target_compile_options(sharding_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
@@ -409,6 +426,9 @@ if(ENABLE_SANITIZERS)
     
     target_compile_options(temporal_query_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
     target_link_options(temporal_query_test PRIVATE ${SANITIZER_LINK_FLAGS})
+
+    target_compile_options(update_query_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
+    target_link_options(update_query_test PRIVATE ${SANITIZER_LINK_FLAGS})
 endif()
 
 # Simple test registration
@@ -430,6 +450,7 @@ add_test(NAME StringRefConcurrentTest COMMAND string_ref_concurrent_test)
 add_test(NAME NodeVersionTest COMMAND node_version_test)
 add_test(NAME NodeViewTest COMMAND node_view_test)
 add_test(NAME TemporalQueryTest COMMAND temporal_query_test)
+add_test(NAME UpdateQueryTest COMMAND update_query_test)
 
 # Set TSan options for tests after they've been registered
 if(ENABLE_SANITIZERS AND SANITIZER_TYPE STREQUAL "thread" AND EXISTS ${TSAN_SUPPRESSIONS_FILE})
@@ -453,6 +474,7 @@ set_tests_properties(
         JoinTest
         WhereExpressionTest
         FreeListArenaTest
+        UpdateQueryTest
         PROPERTIES
         ISOLATED TRUE  # This ensures tests run in isolation
 )
\ No newline at end of file
diff --git a/tests/update_query_test.cpp b/tests/update_query_test.cpp
new file mode 100644
index 0000000..99fd21d
--- /dev/null
+++ b/tests/update_query_test.cpp
@@ -0,0 +1,461 @@
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../include/core.hpp"
+#include "../include/query.hpp"
+#include "../include/utils.hpp"
+
+#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok())
+
+using namespace std::string_literals;
+using namespace tundradb;
+
+namespace tundradb {
+
+class UpdateQueryTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    auto user_schema = arrow::schema({
+        arrow::field("name", arrow::utf8()),
+        arrow::field("age", arrow::int32()),
+        arrow::field("city", arrow::utf8()),
+        arrow::field("salary", arrow::int32()),
+    });
+
+    auto company_schema = arrow::schema({
+        arrow::field("name", arrow::utf8()),
+        arrow::field("size", arrow::int32()),
+    });
+
+    auto db_path = "update_query_test_db_" + std::to_string(now_millis());
+    auto config = make_config()
+                      .with_db_path(db_path)
+                      .with_shard_capacity(1000)
+                      .with_chunk_size(1000)
+                      .build();
+
+    db_ = std::make_shared<Database>(config);
+    db_->get_schema_registry()->create("User", user_schema).ValueOrDie();
+    db_->get_schema_registry()->create("Company", company_schema).ValueOrDie();
+
+    create_test_data();
+  }
+
+  void create_test_data() {
+    struct UserData {
+      std::string name;
+      int32_t age;
+      std::string city;
+      int32_t salary;
+    };
+
+    std::vector<UserData> users = {
+        {"Alice", 25, "NYC", 80000},    // id: 0
+        {"Bob", 35, "NYC", 120000},     // id: 1
+        {"Charlie", 45, "SF", 150000},  // id: 2
+        {"Diana", 30, "LA", 60000},     // id: 3
+        {"Eve", 55, "NYC", 200000},     // id: 4
+    };
+
+    for (const auto& u : users) {
+      std::unordered_map<std::string, Value> data = {
+          {"name", Value{u.name}},
+          {"age", Value{u.age}},
+          {"city", Value{u.city}},
+          {"salary", Value{u.salary}},
+      };
+      db_->create_node("User", data).ValueOrDie();
+    }
+
+    std::unordered_map<std::string, Value> company_data = {
+        {"name", Value{"TechCorp"s}},
+        {"size", Value{int32_t(5000)}},
+    };
+    db_->create_node("Company", company_data).ValueOrDie();
+
+    // Create work relationships: Alice and Bob work at TechCorp
+    db_->connect(0, "WORKS_AT", 0).ValueOrDie();  // Alice → TechCorp
+    db_->connect(1, "WORKS_AT", 0).ValueOrDie();  // Bob   → TechCorp
+  }
+
+  /// Query a single node by ID and return its field value.
+  template <typename T>
+  T get_field(const std::string& schema, int64_t id,
+              const std::string& field_name) {
+    const std::string alias = "_";
+    auto query = Query::from(alias + ":" + schema).build();
+    auto result = db_->query(query).ValueOrDie();
+    auto table = result->table();
+    auto ids = get_column_values<int64_t>(table, alias + ".id").ValueOrDie();
+    auto vals =
+        get_column_values<T>(table, alias + "." + field_name).ValueOrDie();
+    for (size_t i = 0; i < ids.size(); ++i) {
+      if (ids[i] == id) return vals[i];
+    }
+    throw std::runtime_error("Node not found: " + schema + "(" +
+                             std::to_string(id) + ")");
+  }
+
+  std::shared_ptr<Database> db_;
+};
+
+// =========================================================================
+// Builder API tests — Mode 1 (by ID)
+// =========================================================================
+
+TEST_F(UpdateQueryTest, BuilderOnRequiresAtLeastOneSet) {
+  EXPECT_THROW((UpdateQuery::on("User", 0).build()), std::runtime_error);
+}
+
+TEST_F(UpdateQueryTest, BuilderStoresSchema) {
+  auto uq = UpdateQuery::on("User", 0).set("age", Value(31)).build();
+  EXPECT_EQ(uq.schema(), "User");
+}
+
+TEST_F(UpdateQueryTest, BuilderStoresNodeId) {
+  auto uq = UpdateQuery::on("User", 42).set("age", Value(31)).build();
+  ASSERT_TRUE(uq.node_id().has_value());
+  EXPECT_EQ(uq.node_id().value(), 42);
+}
+
+TEST_F(UpdateQueryTest, BuilderStoresMultipleAssignments) {
+  auto uq = UpdateQuery::on("User", 0)
+                .set("age", Value(31))
+                .set("name", Value("Bob"s))
+                .build();
+  EXPECT_EQ(uq.assignments().size(), 2);
+  EXPECT_EQ(uq.assignments()[0].field_name, "age");
+  EXPECT_EQ(uq.assignments()[1].field_name, "name");
+}
+
+TEST_F(UpdateQueryTest, BuilderDefaultUpdateTypeIsSET) {
+  auto uq = UpdateQuery::on("User", 0).set("age", Value(31)).build();
+  EXPECT_EQ(uq.update_type(), UpdateType::SET);
+}
+
+// =========================================================================
+// Builder API tests — Mode 2 (by MATCH)
+// =========================================================================
+
+TEST_F(UpdateQueryTest, MatchRequiresAtLeastOneSet) {
+  auto q = Query::from("u:User").build();
+  EXPECT_THROW((UpdateQuery::match(q).build()), std::runtime_error);
+}
+
+TEST_F(UpdateQueryTest, MatchStoresQuery) {
+  auto q = Query::from("u:User")
+               .where("u.city", CompareOp::Eq, Value("NYC"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("u.age", Value(31)).build();
+  EXPECT_TRUE(uq.has_match());
+  EXPECT_FALSE(uq.node_id().has_value());
+}
+
+TEST_F(UpdateQueryTest, MatchTargetAliasesFromSetFields) {
+  auto q = Query::from("u:User").traverse("u", "WORKS_AT", "c:Company").build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.salary", Value(int32_t(0)))
+                .set("c.size", Value(int32_t(9)))
+                .build();
+  auto aliases = uq.target_aliases();
+  EXPECT_EQ(aliases.size(), 2);
+  EXPECT_NE(std::find(aliases.begin(), aliases.end(), "u"), aliases.end());
+  EXPECT_NE(std::find(aliases.begin(), aliases.end(), "c"), aliases.end());
+}
+
+// =========================================================================
+// Database::update() — Mode 1 (by ID)
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateByIdSingleField) {
+  auto uq = UpdateQuery::on("User", 0).set("age", Value(int32_t(99))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.updated_count, 1);
+  EXPECT_EQ(ur.failed_count, 0);
+  EXPECT_TRUE(ur.errors.empty());
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "age"), 99);
+}
+
+TEST_F(UpdateQueryTest, UpdateByIdMultipleFields) {
+  auto uq = UpdateQuery::on("User", 1)
+                .set("age", Value(int32_t(40)))
+                .set("city", Value("LA"s))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.updated_count, 1);
+  EXPECT_EQ(ur.failed_count, 0);
+
+  EXPECT_EQ(get_field<int32_t>("User", 1, "age"), 40);
+  EXPECT_EQ(get_field<std::string>("User", 1, "city"), "LA");
+}
+
+TEST_F(UpdateQueryTest, UpdateByIdStringField) {
+  auto uq = UpdateQuery::on("User", 0).set("name", Value("Alicia"s)).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 1);
+
+  EXPECT_EQ(get_field<std::string>("User", 0, "name"), "Alicia");
+}
+
+TEST_F(UpdateQueryTest, UpdateByIdNonexistentNode) {
+  auto uq = UpdateQuery::on("User", 999).set("age", Value(int32_t(99))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.updated_count, 0);
+  EXPECT_EQ(ur.failed_count, 1);
+  EXPECT_FALSE(ur.errors.empty());
+}
+
+TEST_F(UpdateQueryTest, UpdateByIdInvalidField) {
+  auto uq = UpdateQuery::on("User", 0)
+                .set("nonexistent_field", Value(int32_t(1)))
+                .build();
+
+  auto result = db_->update(uq);
+  EXPECT_FALSE(result.ok());
+}
+
+TEST_F(UpdateQueryTest, UpdateByIdInvalidSchema) {
+  auto uq =
+      UpdateQuery::on("NoSuchSchema", 0).set("age", Value(int32_t(1))).build();
+
+  auto result = db_->update(uq);
+  EXPECT_FALSE(result.ok());
+}
+
+// =========================================================================
+// Database::update() — Mode 2 (single alias)
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateByMatchSimpleWhere) {
+  // All NYC users: Alice(0), Bob(1), Eve(4)
+  auto q = Query::from("u:User")
+               .where("u.city", CompareOp::Eq, Value("NYC"s))
+               .build();
+  auto uq =
+      UpdateQuery::match(q).set("u.salary", Value(int32_t(999999))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.updated_count, 3);
+  EXPECT_EQ(ur.failed_count, 0);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "salary"), 999999);  // Alice
+  EXPECT_EQ(get_field<int32_t>("User", 1, "salary"), 999999);  // Bob
+  EXPECT_EQ(get_field<int32_t>("User", 4, "salary"), 999999);  // Eve
+
+  // Non-NYC users unchanged
+  EXPECT_EQ(get_field<int32_t>("User", 2, "salary"), 150000);  // Charlie
+  EXPECT_EQ(get_field<int32_t>("User", 3, "salary"), 60000);   // Diana
+}
+
+TEST_F(UpdateQueryTest, UpdateByMatchSingleResult) {
+  auto q = Query::from("u:User")
+               .where("u.name", CompareOp::Eq, Value("Alice"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("u.age", Value(int32_t(26))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 1);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "age"), 26);
+}
+
+TEST_F(UpdateQueryTest, UpdateByMatchNoResults) {
+  auto q = Query::from("u:User")
+               .where("u.name", CompareOp::Eq, Value("Nobody"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("u.age", Value(int32_t(0))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 0);
+}
+
+TEST_F(UpdateQueryTest, UpdateByMatchCompoundAnd) {
+  // age > 30 AND city = "NYC" → Bob(35,NYC), Eve(55,NYC)
+  auto q = Query::from("u:User")
+               .where("u.age", CompareOp::Gt, Value(int32_t(30)))
+               .and_where("u.city", CompareOp::Eq, Value("NYC"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("u.salary", Value(int32_t(0))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 2);
+
+  EXPECT_EQ(get_field<int32_t>("User", 1, "salary"), 0);      // Bob
+  EXPECT_EQ(get_field<int32_t>("User", 4, "salary"), 0);      // Eve
+  EXPECT_EQ(get_field<int32_t>("User", 0, "salary"), 80000);  // Alice unchanged
+}
+
+TEST_F(UpdateQueryTest, UpdateByMatchMultipleSetFields) {
+  auto q = Query::from("u:User")
+               .where("u.name", CompareOp::Eq, Value("Alice"s))
+               .build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.age", Value(int32_t(26)))
+                .set("u.city", Value("SF"s))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 1);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "age"), 26);
+  EXPECT_EQ(get_field<std::string>("User", 0, "city"), "SF");
+}
+
+// =========================================================================
+// Database::update() — Mode 2 with traversal
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateByMatchWithTraversal) {
+  // Update users who work at TechCorp: Alice(0), Bob(1)
+  auto q = Query::from("u:User")
+               .traverse("u", "WORKS_AT", "c:Company")
+               .where("c.name", CompareOp::Eq, Value("TechCorp"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("u.salary", Value(int32_t(777))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.updated_count, 2);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "salary"), 777);  // Alice
+  EXPECT_EQ(get_field<int32_t>("User", 1, "salary"), 777);  // Bob
+  EXPECT_EQ(get_field<int32_t>("User", 2, "salary"),
+            150000);  // Charlie unchanged
+}
+
+// =========================================================================
+// Database::update() — Mode 2 multi-schema (update both sides)
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateMultiSchemaViaTraversal) {
+  // UPDATE users who work at TechCorp AND update TechCorp itself
+  auto q = Query::from("u:User")
+               .traverse("u", "WORKS_AT", "c:Company")
+               .where("c.name", CompareOp::Eq, Value("TechCorp"s))
+               .build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.salary", Value(int32_t(111)))
+                .set("c.size", Value(int32_t(9999)))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  // 2 user nodes + 1 company node (appears in each row but same ID)
+  EXPECT_GE(ur.updated_count, 2);  // at least u(0) and u(1)
+  EXPECT_EQ(ur.failed_count, 0);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "salary"), 111);    // Alice
+  EXPECT_EQ(get_field<int32_t>("User", 1, "salary"), 111);    // Bob
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 9999);  // TechCorp
+}
+
+// =========================================================================
+// Database::update() — Mode 2 bad alias in SET
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateByMatchBadAliasInSet) {
+  auto q = Query::from("u:User").build();
+  auto uq = UpdateQuery::match(q)
+                .set("x.salary", Value(int32_t(0)))  // "x" not in MATCH
+                .build();
+
+  auto result = db_->update(uq);
+  EXPECT_FALSE(result.ok());
+}
+
+TEST_F(UpdateQueryTest, UpdateByMatchUnqualifiedFieldFails) {
+  auto q = Query::from("u:User").build();
+  auto uq = UpdateQuery::match(q)
+                .set("salary", Value(int32_t(0)))  // missing alias
+                .build();
+
+  auto result = db_->update(uq);
+  EXPECT_FALSE(result.ok());
+}
+
+// =========================================================================
+// Different schema — Mode 1
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateCompanyById) {
+  auto uq =
+      UpdateQuery::on("Company", 0).set("size", Value(int32_t(9999))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 1);
+
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 9999);
+}
+
+// =========================================================================
+// Verify original values are untouched
+// =========================================================================
+
+TEST_F(UpdateQueryTest, UpdateDoesNotAffectOtherFields) {
+  auto uq = UpdateQuery::on("User", 0).set("age", Value(int32_t(99))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "age"), 99);
+  EXPECT_EQ(get_field<std::string>("User", 0, "name"), "Alice");
+  EXPECT_EQ(get_field<std::string>("User", 0, "city"), "NYC");
+  EXPECT_EQ(get_field<int32_t>("User", 0, "salary"), 80000);
+}
+
+TEST_F(UpdateQueryTest, UpdateDoesNotAffectOtherNodes) {
+  auto uq = UpdateQuery::on("User", 0).set("age", Value(int32_t(99))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  EXPECT_EQ(get_field<int32_t>("User", 1, "age"), 35);
+  EXPECT_EQ(get_field<int32_t>("User", 2, "age"), 45);
+  EXPECT_EQ(get_field<int32_t>("User", 3, "age"), 30);
+  EXPECT_EQ(get_field<int32_t>("User", 4, "age"), 55);
+}
+
+// =========================================================================
+// Sequential updates
+// =========================================================================
+
+TEST_F(UpdateQueryTest, SequentialUpdatesAccumulate) {
+  ASSERT_OK(db_->update(
+      UpdateQuery::on("User", 0).set("age", Value(int32_t(50))).build()));
+  ASSERT_OK(db_->update(
+      UpdateQuery::on("User", 0).set("age", Value(int32_t(60))).build()));
+
+  EXPECT_EQ(get_field<int32_t>("User", 0, "age"), 60);
+}
+
+}  // namespace tundradb

From 92b232f9d8802b815cb049aae54f35688315f953 Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sat, 7 Mar 2026 13:42:57 -0500
Subject: [PATCH 2/4] update operation tests

---
 CMakeLists.txt                   |   2 +-
 tests/CMakeLists.txt             |  22 ++
 tests/update_query_join_test.cpp | 349 +++++++++++++++++++++++++++++++
 3 files changed, 372 insertions(+), 1 deletion(-)
 create mode 100644 tests/update_query_join_test.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e1b7c1..ec14971 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(tundradb VERSION 1.0.0 LANGUAGES CXX C)
 set(CMAKE_CXX_STANDARD 23)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-# Option: bui   ld shared or static library (default: static)
+# Option: build shared or static library (default: static)
 option(BUILD_SHARED_LIBS "Build TundraDB as a shared library" OFF)
 option(TUNDRADB_BUILD_SHELL "Build the tundra_shell interactive CLI" ON)
 option(TUNDRADB_BUILD_TESTS "Build tests" ON)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 921d7eb..5d11d49 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -150,6 +150,10 @@ add_executable(temporal_query_test
 add_executable(update_query_test
         update_query_test.cpp)
 
+# Update query join test
+add_executable(update_query_join_test
+        update_query_join_test.cpp)
+
 # Link against Arrow and GTest
 target_link_libraries(sharding_test
     PRIVATE
@@ -386,6 +390,19 @@ target_link_libraries(update_query_test
         LLVMSupport LLVMCore
 )
 
+target_link_libraries(update_query_join_test
+        PRIVATE
+        core
+        Arrow::arrow_shared
+        Parquet::parquet_shared
+        GTest::GTest
+        GTest::Main
+        pthread
+        TBB::tbb
+        spdlog::spdlog
+        LLVMSupport LLVMCore
+)
+
 # Apply sanitizer flags to all test targets if enabled
 if(ENABLE_SANITIZERS)
     target_compile_options(sharding_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
@@ -429,6 +446,9 @@ if(ENABLE_SANITIZERS)
 
     target_compile_options(update_query_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
     target_link_options(update_query_test PRIVATE ${SANITIZER_LINK_FLAGS})
+
+    target_compile_options(update_query_join_test PRIVATE ${SANITIZER_COMPILE_FLAGS})
+    target_link_options(update_query_join_test PRIVATE ${SANITIZER_LINK_FLAGS})
 endif()
 
 # Simple test registration
@@ -451,6 +471,7 @@ add_test(NAME NodeVersionTest COMMAND node_version_test)
 add_test(NAME NodeViewTest COMMAND node_view_test)
 add_test(NAME TemporalQueryTest COMMAND temporal_query_test)
 add_test(NAME UpdateQueryTest COMMAND update_query_test)
+add_test(NAME UpdateQueryJoinTest COMMAND update_query_join_test)
 
 # Set TSan options for tests after they've been registered
 if(ENABLE_SANITIZERS AND SANITIZER_TYPE STREQUAL "thread" AND EXISTS ${TSAN_SUPPRESSIONS_FILE})
@@ -475,6 +496,7 @@ set_tests_properties(
         WhereExpressionTest
         FreeListArenaTest
         UpdateQueryTest
+        UpdateQueryJoinTest
         PROPERTIES
         ISOLATED TRUE  # This ensures tests run in isolation
 )
\ No newline at end of file
diff --git a/tests/update_query_join_test.cpp b/tests/update_query_join_test.cpp
new file mode 100644
index 0000000..3702369
--- /dev/null
+++ b/tests/update_query_join_test.cpp
@@ -0,0 +1,349 @@
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../include/core.hpp"
+#include "../include/query.hpp"
+#include "../include/utils.hpp"
+
+#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok())
+
+using namespace std::string_literals;
+using namespace tundradb;
+
+namespace tundradb {
+
+// =========================================================================
+// Fixture: cross-schema graph  (User --WORKS_AT--> Company)
+// =========================================================================
+
+class UpdateJoinCrossSchemaTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    auto user_schema = arrow::schema({
+        arrow::field("name", arrow::utf8()),
+        arrow::field("age", arrow::int32()),
+        arrow::field("employed", arrow::boolean()),
+    });
+
+    auto company_schema = arrow::schema({
+        arrow::field("name", arrow::utf8()),
+        arrow::field("size", arrow::int32()),
+    });
+
+    auto db_path = "update_join_cross_test_db_" + std::to_string(now_millis());
+    auto config = make_config()
+                      .with_db_path(db_path)
+                      .with_shard_capacity(1000)
+                      .with_chunk_size(1000)
+                      .build();
+
+    db_ = std::make_shared<Database>(config);
+    db_->get_schema_registry()->create("User", user_schema).ValueOrDie();
+    db_->get_schema_registry()->create("Company", company_schema).ValueOrDie();
+
+    // Users:  Alice(0), Bob(1), Charlie(2)
+    // All start with employed = false
+    for (const auto& [name, age] : std::vector<std::pair<std::string, int32_t>>{
+             {"Alice", 30}, {"Bob", 25}, {"Charlie", 40}}) {
+      std::unordered_map<std::string, Value> data = {
+          {"name", Value{name}},
+          {"age", Value{age}},
+          {"employed", Value{false}},
+      };
+      db_->create_node("User", data).ValueOrDie();
+    }
+
+    // Companies:  Acme(0)  size=0,  Globex(1)  size=0
+    for (const auto& name : {"Acme"s, "Globex"s}) {
+      std::unordered_map<std::string, Value> data = {
+          {"name", Value{name}},
+          {"size", Value{int32_t(0)}},
+      };
+      db_->create_node("Company", data).ValueOrDie();
+    }
+
+    // Edges: Alice(0) --WORKS_AT--> Acme(0)
+    //        Bob(1)   --WORKS_AT--> Acme(0)
+    db_->connect(0, "WORKS_AT", 0).ValueOrDie();
+    db_->connect(1, "WORKS_AT", 0).ValueOrDie();
+  }
+
+  template <typename T>
+  T get_field(const std::string& schema, int64_t id,
+              const std::string& field_name) {
+    auto query = Query::from("_:" + schema).build();
+    auto result = db_->query(query).ValueOrDie();
+    auto table = result->table();
+    auto ids = get_column_values<int64_t>(table, "_.id").ValueOrDie();
+    auto vals = get_column_values<T>(table, "_." + field_name).ValueOrDie();
+    for (size_t i = 0; i < ids.size(); ++i) {
+      if (ids[i] == id) return vals[i];
+    }
+    throw std::runtime_error("Node not found: " + schema + "(" +
+                             std::to_string(id) + ")");
+  }
+
+  std::shared_ptr<Database> db_;
+};
+
+// -------------------------------------------------------------------------
+// Cross-schema: set User.employed=true AND Company.size=1
+// -------------------------------------------------------------------------
+
+TEST_F(UpdateJoinCrossSchemaTest, UpdateBothSidesOfTraversal) {
+  // Preconditions
+  EXPECT_EQ(get_field<bool>("User", 0, "employed"), false);  // Alice
+  EXPECT_EQ(get_field<bool>("User", 1, "employed"), false);  // Bob
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 0);    // Acme
+
+  // MATCH (u:User)-[:WORKS_AT]->(c:Company)
+  //   WHERE c.name = "Acme"
+  //   SET u.employed = true, c.size = 1
+  auto q = Query::from("u:User")
+               .traverse("u", "WORKS_AT", "c:Company")
+               .where("c.name", CompareOp::Eq, Value("Acme"s))
+               .build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.employed", Value(true))
+                .set("c.size", Value(int32_t(1)))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.failed_count, 0);
+  EXPECT_TRUE(ur.errors.empty());
+
+  // Users who work at Acme are now employed
+  EXPECT_EQ(get_field<bool>("User", 0, "employed"), true);  // Alice ✓
+  EXPECT_EQ(get_field<bool>("User", 1, "employed"), true);  // Bob   ✓
+
+  // Charlie never matched — unchanged
+  EXPECT_EQ(get_field<bool>("User", 2, "employed"), false);
+
+  // Acme's size was updated
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 1);
+
+  // Globex was not part of the traversal — unchanged
+  EXPECT_EQ(get_field<int32_t>("Company", 1, "size"), 0);
+}
+
+TEST_F(UpdateJoinCrossSchemaTest, UpdateOnlyUserSide) {
+  // Only update User.employed, leave Company untouched
+  auto q = Query::from("u:User").traverse("u", "WORKS_AT", "c:Company").build();
+  auto uq = UpdateQuery::match(q).set("u.employed", Value(true)).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().failed_count, 0);
+
+  EXPECT_EQ(get_field<bool>("User", 0, "employed"), true);
+  EXPECT_EQ(get_field<bool>("User", 1, "employed"), true);
+  EXPECT_EQ(get_field<bool>("User", 2, "employed"), false);  // no edge
+
+  // Company unchanged
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 0);
+}
+
+TEST_F(UpdateJoinCrossSchemaTest, UpdateOnlyCompanySide) {
+  // Only update Company.size, leave User untouched
+  auto q = Query::from("u:User")
+               .traverse("u", "WORKS_AT", "c:Company")
+               .where("c.name", CompareOp::Eq, Value("Acme"s))
+               .build();
+  auto uq = UpdateQuery::match(q).set("c.size", Value(int32_t(42))).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().failed_count, 0);
+
+  // Users unchanged
+  EXPECT_EQ(get_field<bool>("User", 0, "employed"), false);
+  EXPECT_EQ(get_field<bool>("User", 1, "employed"), false);
+
+  // Acme updated
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 42);
+}
+
+TEST_F(UpdateJoinCrossSchemaTest, TraversalWithNoMatchUpdatesNothing) {
+  // WHERE c.name = "NonExistent" → no rows
+  auto q = Query::from("u:User")
+               .traverse("u", "WORKS_AT", "c:Company")
+               .where("c.name", CompareOp::Eq, Value("NonExistent"s))
+               .build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.employed", Value(true))
+                .set("c.size", Value(int32_t(999)))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().updated_count, 0);
+
+  // Everything unchanged
+  EXPECT_EQ(get_field<bool>("User", 0, "employed"), false);
+  EXPECT_EQ(get_field<int32_t>("Company", 0, "size"), 0);
+}
+
+// =========================================================================
+// Fixture: same-schema graph  (User --FRIEND--> User)
+// =========================================================================
+
+class UpdateJoinSameSchemaTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    auto user_schema = arrow::schema({
+        arrow::field("name", arrow::utf8()),
+        arrow::field("has_friend", arrow::boolean()),
+    });
+
+    auto db_path = "update_join_same_test_db_" + std::to_string(now_millis());
+    auto config = make_config()
+                      .with_db_path(db_path)
+                      .with_shard_capacity(1000)
+                      .with_chunk_size(1000)
+                      .build();
+
+    db_ = std::make_shared<Database>(config);
+    db_->get_schema_registry()->create("User", user_schema).ValueOrDie();
+
+    // Users:  Alice(0), Bob(1), Charlie(2), Diana(3)
+    // All start with has_friend = false
+    for (const auto& name : {"Alice"s, "Bob"s, "Charlie"s, "Diana"s}) {
+      std::unordered_map<std::string, Value> data = {
+          {"name", Value{name}},
+          {"has_friend", Value{false}},
+      };
+      db_->create_node("User", data).ValueOrDie();
+    }
+
+    // Edges (directed):
+    //   Alice(0)  --FRIEND--> Bob(1)
+    //   Alice(0)  --FRIEND--> Charlie(2)
+    db_->connect(0, "FRIEND", 1).ValueOrDie();
+    db_->connect(0, "FRIEND", 2).ValueOrDie();
+    // Diana(3) has no friends
+  }
+
+  template <typename T>
+  T get_field(const std::string& schema, int64_t id,
+              const std::string& field_name) {
+    auto query = Query::from("_:" + schema).build();
+    auto result = db_->query(query).ValueOrDie();
+    auto table = result->table();
+    auto ids = get_column_values<int64_t>(table, "_.id").ValueOrDie();
+    auto vals = get_column_values<T>(table, "_." + field_name).ValueOrDie();
+    for (size_t i = 0; i < ids.size(); ++i) {
+      if (ids[i] == id) return vals[i];
+    }
+    throw std::runtime_error("Node not found: " + schema + "(" +
+                             std::to_string(id) + ")");
+  }
+
+  std::shared_ptr<Database> db_;
+};
+
+// -------------------------------------------------------------------------
+// Same-schema: set has_friend=true on both sides of the friendship
+// -------------------------------------------------------------------------
+
+TEST_F(UpdateJoinSameSchemaTest, UpdateBothSidesOfFriendship) {
+  // Preconditions
+  EXPECT_EQ(get_field<bool>("User", 0, "has_friend"), false);
+  EXPECT_EQ(get_field<bool>("User", 1, "has_friend"), false);
+  EXPECT_EQ(get_field<bool>("User", 2, "has_friend"), false);
+  EXPECT_EQ(get_field<bool>("User", 3, "has_friend"), false);
+
+  // MATCH (u:User)-[:FRIEND]->(f:User)
+  //   SET u.has_friend = true, f.has_friend = true
+  auto q = Query::from("u:User").traverse("u", "FRIEND", "f:User").build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.has_friend", Value(true))
+                .set("f.has_friend", Value(true))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+
+  auto ur = result.ValueOrDie();
+  EXPECT_EQ(ur.failed_count, 0);
+
+  // Alice is the source of both edges → updated via "u"
+  EXPECT_EQ(get_field<bool>("User", 0, "has_friend"), true);
+
+  // Bob and Charlie are targets → updated via "f"
+  EXPECT_EQ(get_field<bool>("User", 1, "has_friend"), true);
+  EXPECT_EQ(get_field<bool>("User", 2, "has_friend"), true);
+
+  // Diana has no edges — unchanged
+  EXPECT_EQ(get_field<bool>("User", 3, "has_friend"), false);
+}
+
+TEST_F(UpdateJoinSameSchemaTest, UpdateOnlySourceSide) {
+  // Only update the source alias "u"
+  auto q = Query::from("u:User").traverse("u", "FRIEND", "f:User").build();
+  auto uq = UpdateQuery::match(q).set("u.has_friend", Value(true)).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().failed_count, 0);
+
+  // Only Alice is the source
+  EXPECT_EQ(get_field<bool>("User", 0, "has_friend"), true);
+
+  // Bob and Charlie are only targets — not updated
+  EXPECT_EQ(get_field<bool>("User", 1, "has_friend"), false);
+  EXPECT_EQ(get_field<bool>("User", 2, "has_friend"), false);
+}
+
+TEST_F(UpdateJoinSameSchemaTest, UpdateOnlyTargetSide) {
+  // Only update the target alias "f"
+  auto q = Query::from("u:User").traverse("u", "FRIEND", "f:User").build();
+  auto uq = UpdateQuery::match(q).set("f.has_friend", Value(true)).build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().failed_count, 0);
+
+  // Alice is source only — not updated via "f"
+  EXPECT_EQ(get_field<bool>("User", 0, "has_friend"), false);
+
+  // Bob and Charlie are targets
+  EXPECT_EQ(get_field<bool>("User", 1, "has_friend"), true);
+  EXPECT_EQ(get_field<bool>("User", 2, "has_friend"), true);
+
+  // Diana untouched
+  EXPECT_EQ(get_field<bool>("User", 3, "has_friend"), false);
+}
+
+TEST_F(UpdateJoinSameSchemaTest, UpdateWithWhereOnTarget) {
+  // Only update friends named "Bob"
+  auto q = Query::from("u:User")
+               .traverse("u", "FRIEND", "f:User")
+               .where("f.name", CompareOp::Eq, Value("Bob"s))
+               .build();
+  auto uq = UpdateQuery::match(q)
+                .set("u.has_friend", Value(true))
+                .set("f.has_friend", Value(true))
+                .build();
+
+  auto result = db_->update(uq);
+  ASSERT_OK(result);
+  EXPECT_EQ(result.ValueOrDie().failed_count, 0);
+
+  // Alice → Bob matched
+  EXPECT_EQ(get_field<bool>("User", 0, "has_friend"), true);  // Alice (source)
+  EXPECT_EQ(get_field<bool>("User", 1, "has_friend"), true);  // Bob   (target)
+
+  // Alice → Charlie did NOT match (WHERE f.name = "Bob")
+  EXPECT_EQ(get_field<bool>("User", 2, "has_friend"), false);  // Charlie
+
+  // Diana untouched
+  EXPECT_EQ(get_field<bool>("User", 3, "has_friend"), false);
+}
+
+}  // namespace tundradb

From e507ed7964923282ede1e5dae8be7bc67b32c9e2 Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sat, 7 Mar 2026 19:25:06 -0500
Subject: [PATCH 3/4] move shard, shard manager and snapshot_manager to
 individual files.

---
 CMakeLists.txt                             |   3 +-
 include/core.hpp                           | 697 +--------------------
 include/shard.hpp                          | 170 +++++
 include/snapshot_manager.hpp               |  44 ++
 include/storage.hpp                        |   4 +-
 src/shard.cpp                              | 601 ++++++++++++++++++
 src/{snapshot.cpp => snapshot_manager.cpp} |  30 +-
 src/storage.cpp                            |   2 +
 8 files changed, 845 insertions(+), 706 deletions(-)
 create mode 100644 include/shard.hpp
 create mode 100644 include/snapshot_manager.hpp
 create mode 100644 src/shard.cpp
 rename src/{snapshot.cpp => snapshot_manager.cpp} (93%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec14971..728a1fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -223,10 +223,11 @@ add_library(core
         src/query_execution.cpp
         src/join.cpp
         src/row.cpp
+        src/shard.cpp
         src/storage.cpp
         src/metadata.cpp
         src/file_utils.cpp
-        src/snapshot.cpp
+        src/snapshot_manager.cpp
         src/edge_store.cpp
         src/table_info.cpp
         src/utils.cpp
diff --git a/include/core.hpp b/include/core.hpp
index caad2da..030d090 100644
--- a/include/core.hpp
+++ b/include/core.hpp
@@ -1,19 +1,10 @@
 #pragma once
 
 #include <arrow/api.h>
-#include <arrow/compute/api.h>
 #include <arrow/result.h>
 #include <arrow/table.h>
-#include <arrow/type.h>
-#include <parquet/arrow/reader.h>
-#include <spdlog/spdlog.h>
-#include <tbb/concurrent_map.h>
-#include <tbb/concurrent_vector.h>
 
-#include <atomic>
 #include <memory>
-#include <memory_resource>
-#include <shared_mutex>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -21,701 +12,19 @@
 #include "arrow_utils.hpp"
 #include "config.hpp"
 #include "edge_store.hpp"
-#include "file_utils.hpp"
-#include "json.hpp"
 #include "logger.hpp"
 #include "metadata.hpp"
 #include "node.hpp"
 #include "query.hpp"
 #include "query_execution.hpp"
 #include "schema.hpp"
+#include "shard.hpp"
+#include "snapshot_manager.hpp"
 #include "storage.hpp"
 #include "utils.hpp"
 
 namespace tundradb {
 
-class Database;
-class Node;
-class Shard;
-class ShardManager;
-class MetadataManager;
-class Storage;
-class NodeManager;
-
-class SnapshotManager {
- public:
-  explicit SnapshotManager(std::shared_ptr<MetadataManager> metadata_manager,
-                           std::shared_ptr<Storage> storage,
-                           std::shared_ptr<ShardManager> shard_manager,
-                           std::shared_ptr<EdgeStore> edge_store,
-                           std::shared_ptr<NodeManager> node_manager,
-                           std::shared_ptr<SchemaRegistry> schema_registry)
-      : metadata_manager_(std::move(metadata_manager)),
-        storage_(std::move(storage)),
-        shard_manager_(std::move(shard_manager)),
-        edge_store_(std::move(edge_store)),
-        node_manager_(std::move(node_manager)),
-        schema_registry_(std::move(schema_registry)) {}
-
-  arrow::Result<bool> initialize();
-  arrow::Result<Snapshot> commit();
-  Snapshot *current_snapshot();
-  std::shared_ptr<Manifest> get_manifest();
-
- private:
-  std::shared_ptr<MetadataManager> metadata_manager_;
-  std::shared_ptr<Storage> storage_;
-  std::shared_ptr<ShardManager> shard_manager_;
-  std::shared_ptr<SchemaRegistry> schema_registry_;
-  std::shared_ptr<EdgeStore> edge_store_;
-  std::shared_ptr<NodeManager> node_manager_;
-  Metadata metadata_;
-  std::shared_ptr<Manifest> manifest_;
-  std::shared_ptr<EdgeMetadata> edge_metadata_;
-};
-
-class Shard {
- private:
-  std::pmr::monotonic_buffer_resource memory_pool_;
-  std::pmr::unordered_map<int64_t, std::shared_ptr<Node>> nodes_;
-  std::set<int64_t> nodes_ids_;
-  std::atomic<bool> dirty_{false};
-  std::shared_ptr<arrow::Table> table_;
-  std::shared_ptr<SchemaRegistry> schema_registry_;
-  int64_t updated_ts_ = now_millis();
-  bool updated_ = true;  // todo should be false when we read from snapshot and
-                         // after commit
-
- public:
-  const int64_t id;         // Unique shard identifier
-  const int64_t index;      // index of the shard in the shard manager
-  int64_t min_id;           // Minimum node ID in this shard
-  int64_t max_id;           // Maximum node ID in this shard
-  const size_t capacity;    // Maximum number of nodes
-  const size_t chunk_size;  // Size of chunks for table creation
-  std::string schema_name;  // Name of the schema this shard holds
-
-  Shard(int64_t id, int64_t index, size_t capacity, int64_t min_id,
-        int64_t max_id, size_t chunk_size, const std::string &schema_name,
-        std::shared_ptr<SchemaRegistry> schema_registry,
-        size_t buffer_size = 10 * 1024 * 1024)
-      : id(id),
-        index(index),
-        capacity(capacity),
-        min_id(min_id),
-        max_id(max_id),
-        chunk_size(chunk_size),
-        memory_pool_(buffer_size),
-        nodes_(&memory_pool_),
-        schema_registry_(std::move(schema_registry)),
-        schema_name(schema_name) {}
-
-  Shard(int64_t id, int64_t index, const DatabaseConfig &config, int64_t min_id,
-        int64_t max_id, std::string schema_name,
-        std::shared_ptr<SchemaRegistry> schema_registry)
-      : memory_pool_(config.get_shard_memory_pool_size()),
-        nodes_(&memory_pool_),
-        schema_registry_(std::move(schema_registry)),
-        id(id),
-        index(index),
-        min_id(min_id),
-        max_id(max_id),
-        capacity(config.get_shard_capacity()),
-        chunk_size(config.get_chunk_size()),
-        schema_name(std::move(schema_name)) {}
-
-  ~Shard() {
-    nodes_.clear();
-    nodes_ids_.clear();
-    table_.reset();
-
-    // The memory_pool will be automatically destroyed
-    // The schema_registry is a shared_ptr, so it will be handled by reference
-    // counting
-  }
-
-  bool is_updated() const { return updated_; }
-
-  bool set_updated(bool v) {
-    updated_ = v;
-    return updated_;
-  }
-
-  int64_t get_updated_ts() const { return updated_ts_; }
-
-  std::string compound_id() const {
-    return this->schema_name + "-" + std::to_string(this->id);
-  }
-
-  arrow::Result<bool> add(const std::shared_ptr<Node> &node) {
-    if (node->id < min_id || node->id > max_id) {
-      return arrow::Status::Invalid("Node id is out of range");
-    }
-    if (nodes_.contains(node->id)) {
-      return arrow::Status::KeyError("Node already exists: ", node->id);
-    }
-    if (nodes_.size() >= capacity) {
-      return arrow::Status::KeyError("Shard is full");
-    }
-    nodes_.insert(std::make_pair(node->id, node));
-    nodes_ids_.insert(node->id);
-    dirty_ = true;
-    updated_ = true;
-    return true;
-  }
-
-  arrow::Result<bool> extend(const std::shared_ptr<Node> &node) {
-    if (nodes_.contains(node->id)) {
-      return arrow::Status::KeyError("Node already exists: ", node->id);
-    }
-    if (nodes_.size() >= capacity) {
-      return arrow::Status::KeyError("Shard is full");
-    }
-
-    if (empty()) {
-      min_id = node->id;
-      max_id = node->id;
-    } else {
-      if (node->id < min_id) {
-        return arrow::Status::Invalid("Node id is below the minimum range");
-      }
-      max_id = std::max(max_id, node->id);
-    }
-
-    nodes_.insert(std::make_pair(node->id, node));
-    nodes_ids_.insert(node->id);
-
-    dirty_ = true;
-    updated_ = true;
-    updated_ts_ = now_millis();
-    return true;
-  }
-
-  arrow::Result<std::shared_ptr<Node>> remove(int64_t id) {
-    const auto it = nodes_.find(id);
-    if (it == nodes_.end()) {
-      return arrow::Status::Invalid("Node not found: ", id);
-    }
-    auto node = it->second;
-    nodes_.erase(id);
-    nodes_ids_.erase(id);
-    dirty_ = true;
-    updated_ = true;
-    return node;
-  }
-
-  arrow::Result<std::shared_ptr<Node>> poll_first() {
-    if (nodes_ids_.empty()) {
-      return arrow::Status::Invalid("Shard is empty");
-    }
-    auto first = nodes_ids_.begin();
-    auto node_id = *first;
-    nodes_ids_.erase(first);
-    auto node = nodes_[node_id];
-    nodes_.erase(node_id);
-
-    if (!nodes_ids_.empty()) {
-      min_id = *nodes_ids_.begin();
-    }
-
-    dirty_ = true;
-    updated_ = true;
-    updated_ts_ = now_millis();
-    return node;
-  }
-
-  arrow::Result<bool> update(const int64_t node_id,
-                             const std::shared_ptr<Field> field,
-                             const Value &value, const UpdateType update_type) {
-    updated_ = true;
-    if (!nodes_.contains(node_id)) {
-      return arrow::Status::KeyError("Node not found: ", node_id);
-    }
-    dirty_ = true;
-    updated_ = true;
-    updated_ts_ = now_millis();
-    return nodes_[node_id]->update(field, value, update_type);
-  }
-
-  /**
-   * @brief Batch-update multiple fields on one node (creates 1 version).
-   */
-  arrow::Result<bool> update_fields(
-      const int64_t node_id,
-      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
-          &field_updates,
-      const UpdateType update_type) {
-    if (!nodes_.contains(node_id)) {
-      return arrow::Status::KeyError("Node not found: ", node_id);
-    }
-    dirty_ = true;
-    updated_ = true;
-    updated_ts_ = now_millis();
-    return nodes_[node_id]->update_fields(field_updates, update_type);
-  }
-
-  arrow::Result<std::shared_ptr<arrow::Table>> get_table(TemporalContext *ctx) {
-    // if we have ctx we need to create a new table every time
-    if (dirty_ || !table_ || ctx) {
-      ARROW_ASSIGN_OR_RAISE(const auto schema,
-                            schema_registry_->get(schema_name));
-      auto arrow_schema = schema->arrow();
-
-      std::vector<std::shared_ptr<Node>> result;
-      std::ranges::transform(nodes_, std::back_inserter(result),
-                             [](const auto &pair) { return pair.second; });
-
-      std::ranges::sort(
-          result, [](const std::shared_ptr<Node> &a,
-                     const std::shared_ptr<Node> &b) { return a->id < b->id; });
-
-      ARROW_ASSIGN_OR_RAISE(auto table_res,
-                            create_table(schema, result, chunk_size, ctx));
-
-      if (!ctx) {
-        // Non-temporal query: cache the table for reuse
-        table_ = table_res;
-        dirty_ = false;
-      }
-
-      // Return the newly created table (temporal or non-temporal)
-      return table_res;
-    }
-
-    // Reuse cached table (only for non-temporal queries)
-    return table_;
-  }
-
-  size_t size() const { return nodes_.size(); }
-
-  bool has_space() const { return nodes_.size() < capacity; }
-
-  bool empty() const { return nodes_.empty(); }
-
-  std::vector<std::shared_ptr<Node>> get_nodes() const {
-    std::vector<std::shared_ptr<Node>> result;
-    result.reserve(nodes_.size());
-    for (const auto &node : nodes_ | std::views::values) {
-      result.push_back(node);
-    }
-    return result;
-  }
-};
-
-class ShardManager {
- private:
-  std::pmr::monotonic_buffer_resource memory_pool_;
-  std::pmr::unordered_map<std::string, std::vector<std::shared_ptr<Shard>>>
-      shards_;
-  std::shared_ptr<SchemaRegistry> schema_registry_;
-  const size_t shard_capacity_;
-  const size_t chunk_size_;
-  const DatabaseConfig config_;
-  std::atomic<int64_t> id_counter_{
-      0};  // Global unique ID counter for all shards
-  std::unordered_map<std::string, std::atomic<int64_t>>
-      index_counters_;                      // Per-schema index/position counter
-  mutable std::mutex index_counter_mutex_;  // todo use tbb map instead
-
-  void create_new_shard(const std::shared_ptr<Node> &node) {
-    auto new_min_id = node->id;
-    auto new_max_id = node->id + shard_capacity_ - 1;
-
-    int64_t shard_index;
-    {
-      std::lock_guard lock(index_counter_mutex_);
-      shard_index = index_counters_[node->schema_name]++;
-    }
-
-    auto shard = std::make_shared<Shard>(id_counter_.fetch_add(1), shard_index,
-                                         config_, new_min_id, new_max_id,
-                                         node->schema_name, schema_registry_);
-    auto result = shard->add(node);
-    if (!result.ok()) {
-      log_error("Error adding node to new shard: {}",
-                result.status().ToString());
-    }
-
-    shards_[node->schema_name].push_back(shard);
-  }
-
- public:
-  explicit ShardManager(std::shared_ptr<SchemaRegistry> schema_registry,
-                        const DatabaseConfig &config)
-      : memory_pool_(config.get_manager_memory_pool_size()),
-        shards_(&memory_pool_),
-        schema_registry_(std::move(schema_registry)),
-        shard_capacity_(config.get_shard_capacity()),
-        chunk_size_(config.get_chunk_size()),
-        config_(config) {}
-
-  void set_id_counter(const int64_t value) { id_counter_.store(value); }
-  int64_t get_id_counter() const { return id_counter_.load(); }
-
-  void set_index_counter(const std::string &schema_name, const int64_t value) {
-    std::lock_guard lock(index_counter_mutex_);
-    index_counters_[schema_name].store(value);
-  }
-
-  arrow::Result<std::shared_ptr<Shard>> get_shard(
-      const std::string &schema_name, const int64_t id) {
-    return shards_[schema_name][id];
-  }
-
-  int64_t get_index_counter(const std::string &schema_name) const {
-    std::lock_guard lock(index_counter_mutex_);
-    const auto it = index_counters_.find(schema_name);
-    return it != index_counters_.end() ? it->second.load() : 0;
-  }
-
-  std::vector<std::string> get_schema_names() const {
-    std::vector<std::string> schema_names;
-    schema_names.reserve(shards_.size());
-    for (const auto &schema_name : shards_ | std::views::keys) {
-      schema_names.push_back(schema_name);
-    }
-    return schema_names;
-  }
-
-  arrow::Result<std::vector<std::shared_ptr<Shard>>> get_shards(
-      const std::string &schema_name) const {
-    const auto it = shards_.find(schema_name);
-    if (it == shards_.end()) {
-      return arrow::Status::KeyError("Schema '", schema_name,
-                                     "' not found in shards");
-    }
-    return it->second;
-  }
-
-  arrow::Result<bool> is_shard_clean(std::string s, int64_t id) {
-    return !shards_[s][id]->is_updated();
-  }
-
-  arrow::Result<bool> compact(const std::string &schema_name) {
-    const auto it = shards_.find(schema_name);
-    if (it == shards_.end()) {
-      return arrow::Status::Invalid("Shard not found for the given schema: ",
-                                    schema_name);
-    }
-
-    auto &shard_list = it->second;
-    if (shard_list.size() <= 1) {
-      // nothing to compact
-      return true;
-    }
-
-    for (size_t i = 1; i < shard_list.size(); i++) {
-      const auto &prev = shard_list[i - 1];
-      const auto &curr = shard_list[i];
-
-      while (prev->has_space() && !curr->empty()) {
-        auto node = curr->poll_first().ValueOrDie();
-        prev->extend(node).ValueOrDie();
-        if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-          log_debug("node id: " + std::to_string(node->id) +
-                    " moved from shard: " + std::to_string(i) +
-                    " to shard: " + std::to_string(i - 1));
-          log_debug("prev shard id: " + std::to_string(i - 1) +
-                    " min_id=" + std::to_string(prev->min_id) +
-                    " max_id=" + std::to_string(prev->max_id));
-
-          log_debug("curr shard id: " + std::to_string(i) +
-                    " min_id=" + std::to_string(curr->min_id) +
-                    " max_id=" + std::to_string(curr->max_id));
-        }
-      }
-    }
-
-    // second pass: remove empty shards
-    auto it_shard = shard_list.begin();
-    while (it_shard != shard_list.end()) {
-      if ((*it_shard)->empty()) {
-        it_shard = shard_list.erase(it_shard);
-      } else {
-        ++it_shard;
-      }
-    }
-
-    return true;
-  }
-
-  // сompact all schemas in the database
-  arrow::Result<bool> compact_all() {
-    const std::vector<std::string> schema_names =
-        schema_registry_->get_schema_names();
-    bool success = true;
-
-    for (const auto &schema_name : schema_names) {
-      if (auto result = compact(schema_name); !result.ok()) {
-        log_error("Error compacting schema '{}':{}", schema_name,
-                  result.status().ToString());
-        success = false;
-      }
-    }
-
-    return success;
-  }
-
-  arrow::Result<bool> insert_node(const std::shared_ptr<Node> &node) {
-    if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-      log_debug("inserting node id " + std::to_string(node->id));
-    }
-    const auto it = shards_.find(node->schema_name);
-    if (it == shards_.end()) {
-      shards_[node->schema_name] = std::vector<std::shared_ptr<Shard>>();
-      create_new_shard(node);
-      return true;
-    }
-
-    const auto &shard_list = it->second;
-    if (shard_list.empty()) {
-      create_new_shard(node);
-      return true;
-    }
-
-    // first try to find shards that can directly add the node (ID is in range)
-    for (auto &shard : shard_list) {
-      if (node->id >= shard->min_id && node->id <= shard->max_id &&
-          shard->has_space()) {
-        if (auto result = shard->add(node); result.ok()) {
-          if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
-            log_debug("node id: '" + std::to_string(node->id) +
-                      "' inserted to shard id: " + std::to_string(shard->id));
-          }
-
-          return true;
-        }
-        // if there was an error, we'll try the next shard
-      }
-    }
-
-    for (auto &shard : shard_list) {
-      if (shard->has_space()) {
-        if (node->id > shard->max_id) {
-          if (auto result = shard->extend(node); result.ok()) {
-            return true;
-          }
-        }
-      }
-    }
-    create_new_shard(node);
-    return true;
-  }
-
-  arrow::Result<std::shared_ptr<Node>> get_node(const std::string &schema_name,
-                                                int64_t node_id) {
-    const auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return arrow::Status::KeyError("Schema '", schema_name,
-                                     "' not found in shards");
-    }
-
-    for (const auto &shard : schema_it->second) {
-      if (node_id >= shard->min_id && node_id <= shard->max_id) {
-        try {
-          if (auto node_result = shard->remove(node_id); node_result.ok()) {
-            return node_result.ValueOrDie();
-          }
-        } catch (...) {
-          // node wasn't in this shard, continue to next shard
-        }
-      }
-    }
-
-    return arrow::Status::KeyError("Node with id ", node_id,
-                                   " not found in schema '", schema_name, "'");
-  }
-
-  arrow::Result<bool> remove_node(const std::string &schema_name,
-                                  int64_t node_id) {
-    if (!shards_.contains(schema_name)) {
-      return arrow::Status::KeyError("Schema '", schema_name,
-                                     "' not found in shards");
-    }
-
-    for (const auto &shard : shards_[schema_name]) {
-      if (node_id >= shard->min_id && node_id <= shard->max_id) {
-        if (auto remove_result = shard->remove(node_id); remove_result.ok()) {
-          return true;
-        }
-      }
-    }
-
-    return arrow::Status::KeyError("Node with id ", node_id,
-                                   " not found in schema '", schema_name, "'");
-  }
-
-  arrow::Result<bool> update_node(const std::string &schema_name,
-                                  const int64_t id,
-                                  const std::shared_ptr<Field> &field,
-                                  const Value &value,
-                                  const UpdateType update_type) {
-    auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return arrow::Status::KeyError("Schema not found: ", schema_name);
-    }
-
-    for (const auto &shard : schema_it->second) {
-      if (id >= shard->min_id && id <= shard->max_id) {
-        return shard->update(id, field, value, update_type);
-      }
-    }
-
-    return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
-                                   schema_name);
-  }
-
-  arrow::Result<bool> update_node(const std::string &schema_name,
-                                  const int64_t id,
-                                  const std::string &field_name,
-                                  const Value &value,
-                                  const UpdateType update_type) {
-    auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return arrow::Status::KeyError("Schema not found: ", schema_name,
-                                     " in shards");
-    }
-
-    auto field =
-        schema_registry_->get(schema_name).ValueOrDie()->get_field(field_name);
-
-    for (const auto &shard : schema_it->second) {
-      if (id >= shard->min_id && id <= shard->max_id) {
-        return shard->update(id, field, value, update_type);
-      }
-    }
-
-    return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
-                                   schema_name);
-  }
-
-  /**
-   * @brief Batch-update multiple fields on one node (creates 1 version).
-   */
-  arrow::Result<bool> update_node_fields(
-      const std::string &schema_name, const int64_t id,
-      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
-          &field_updates,
-      const UpdateType update_type) {
-    auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return arrow::Status::KeyError("Schema not found: ", schema_name);
-    }
-
-    for (const auto &shard : schema_it->second) {
-      if (id >= shard->min_id && id <= shard->max_id) {
-        return shard->update_fields(id, field_updates, update_type);
-      }
-    }
-
-    return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
-                                   schema_name);
-  }
-
-  arrow::Result<std::vector<std::shared_ptr<Node>>> get_nodes(
-      const std::string &schema_name) {
-    const auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return arrow::Status::KeyError("Schema '", schema_name,
-                                     "' not found in shards");
-    }
-
-    std::vector<std::shared_ptr<Node>> result;
-    size_t total_estimated_nodes = 0;
-    for (const auto &shard : schema_it->second) {
-      total_estimated_nodes += shard->size();
-    }
-    result.reserve(total_estimated_nodes);
-
-    for (const auto &shard : schema_it->second) {
-      auto nodes = shard->get_nodes();
-      result.insert(result.end(), nodes.begin(), nodes.end());
-    }
-
-    return result;
-  }
-
-  arrow::Result<std::vector<std::shared_ptr<arrow::Table>>> get_tables(
-      const std::string &schema_name, TemporalContext *temporal_context) {
-    const auto schema_it = shards_.find(schema_name);
-    if (schema_it == shards_.end()) {
-      return std::vector<std::shared_ptr<arrow::Table>>{};
-    }
-
-    std::vector<std::shared_ptr<Shard>> sorted_shards = schema_it->second;
-
-    std::ranges::sort(sorted_shards, [](const std::shared_ptr<Shard> &a,
-                                        const std::shared_ptr<Shard> &b) {
-      return a->min_id < b->min_id;
-    });
-
-    std::vector<std::shared_ptr<arrow::Table>> tables;
-    for (const auto &shard : sorted_shards) {
-      ARROW_ASSIGN_OR_RAISE(auto table, shard->get_table(temporal_context));
-      if (table->num_rows() > 0) {
-        tables.push_back(table);
-      }
-    }
-
-    return tables;
-  }
-
-  bool has_shards(const std::string &schema_name) const {
-    const auto it = shards_.find(schema_name);
-    return it != shards_.end() && !it->second.empty();
-  }
-
-  arrow::Result<size_t> get_shard_count(const std::string &schema_name) const {
-    if (!has_shards(schema_name)) {
-      return arrow::Status::Invalid("Schema '", schema_name, "' not found");
-    }
-    return shards_.find(schema_name)->second.size();
-  }
-
-  arrow::Result<std::vector<size_t>> get_shard_sizes(
-      const std::string &schema_name) const {
-    if (!has_shards(schema_name)) {
-      return arrow::Status::Invalid("Schema '", schema_name, "' not found");
-    }
-    std::vector<size_t> sizes;
-    for (const auto &shard : shards_.find(schema_name)->second) {
-      sizes.push_back(shard->size());
-    }
-    return sizes;
-  }
-
-  arrow::Result<std::vector<std::pair<int64_t, int64_t>>> get_shard_ranges(
-      const std::string &schema_name) const {
-    if (!has_shards(schema_name)) {
-      return arrow::Status::Invalid("Schema '", schema_name, "' not found");
-    }
-    std::vector<std::pair<int64_t, int64_t>> ranges;
-    for (const auto &shard : shards_.find(schema_name)->second) {
-      ranges.emplace_back(shard->min_id, shard->max_id);
-    }
-    return ranges;
-  }
-
-  arrow::Result<bool> add_shard(const std::shared_ptr<Shard> &shard) {
-    if (!shard) {
-      return arrow::Status::Invalid("Cannot add null shard");
-    }
-
-    shards_[shard->schema_name].push_back(shard);
-    return true;
-  }
-
-  arrow::Result<bool> reset_all_updated() {
-    log_debug("Resetting 'updated' flag for all shards");
-    for (auto &schema_shards : shards_ | std::views::values) {
-      for (auto &shard : schema_shards) {
-        shard->set_updated(false);
-      }
-    }
-    return true;
-  }
-};
-
 class Database {
  private:
   std::shared_ptr<SchemaRegistry> schema_registry_;
@@ -797,7 +106,7 @@ class Database {
 
   arrow::Result<std::shared_ptr<Node>> create_node(
       const std::string &schema_name,
-      std::unordered_map<std::string, Value> &data) {
+      const std::unordered_map<std::string, Value> &data) {
     if (schema_name.empty()) {
       return arrow::Status::Invalid("Schema name cannot be empty");
     }
diff --git a/include/shard.hpp b/include/shard.hpp
new file mode 100644
index 0000000..6309c6d
--- /dev/null
+++ b/include/shard.hpp
@@ -0,0 +1,170 @@
+#pragma once
+
+#include <arrow/result.h>
+#include <arrow/table.h>
+
+#include <atomic>
+#include <memory>
+#include <memory_resource>
+#include <mutex>
+#include <ranges>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "config.hpp"
+#include "node.hpp"
+#include "schema.hpp"
+
+namespace tundradb {
+
+class TemporalContext;
+
+// =========================================================================
+// Shard — a contiguous range of nodes for one schema
+// =========================================================================
+
+class Shard {
+ private:
+  std::pmr::monotonic_buffer_resource memory_pool_;
+  std::pmr::unordered_map<int64_t, std::shared_ptr<Node>> nodes_;
+  std::set<int64_t> nodes_ids_;
+  std::atomic<bool> dirty_{false};
+  std::shared_ptr<arrow::Table> table_;
+  std::shared_ptr<SchemaRegistry> schema_registry_;
+  int64_t updated_ts_;
+  bool updated_ = true;
+
+ public:
+  const int64_t id;
+  const int64_t index;
+  int64_t min_id;
+  int64_t max_id;
+  const size_t capacity;
+  const size_t chunk_size;
+  std::string schema_name;
+
+  Shard(int64_t id, int64_t index, size_t capacity, int64_t min_id,
+        int64_t max_id, size_t chunk_size, std::string schema_name,
+        std::shared_ptr<SchemaRegistry> schema_registry,
+        size_t buffer_size = 10 * 1024 * 1024);
+
+  Shard(int64_t id, int64_t index, const DatabaseConfig &config, int64_t min_id,
+        int64_t max_id, std::string schema_name,
+        std::shared_ptr<SchemaRegistry> schema_registry);
+
+  ~Shard();
+
+  [[nodiscard]] bool is_updated() const;
+  bool set_updated(bool v);
+  [[nodiscard]] int64_t get_updated_ts() const;
+  [[nodiscard]] std::string compound_id() const;
+
+  arrow::Result<bool> add(const std::shared_ptr<Node> &node);
+  arrow::Result<bool> extend(const std::shared_ptr<Node> &node);
+  arrow::Result<std::shared_ptr<Node>> remove(int64_t id);
+  arrow::Result<std::shared_ptr<Node>> poll_first();
+
+  arrow::Result<bool> update(int64_t node_id, std::shared_ptr<Field> field,
+                             const Value &value, UpdateType update_type);
+
+  arrow::Result<bool> update_fields(
+      int64_t node_id,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      UpdateType update_type);
+
+  arrow::Result<std::shared_ptr<arrow::Table>> get_table(TemporalContext *ctx);
+
+  [[nodiscard]] size_t size() const;
+  [[nodiscard]] bool has_space() const;
+  [[nodiscard]] bool empty() const;
+  [[nodiscard]] std::vector<std::shared_ptr<Node>> get_nodes() const;
+};
+
+// =========================================================================
+// ShardManager — manages per-schema shard collections
+// =========================================================================
+
+class ShardManager {
+ private:
+  std::pmr::monotonic_buffer_resource memory_pool_;
+  std::pmr::unordered_map<std::string, std::vector<std::shared_ptr<Shard>>>
+      shards_;
+  std::shared_ptr<SchemaRegistry> schema_registry_;
+  const size_t shard_capacity_;
+  const size_t chunk_size_;
+  const DatabaseConfig config_;
+  std::atomic<int64_t> id_counter_{0};
+  std::unordered_map<std::string, std::atomic<int64_t>> index_counters_;
+  mutable std::mutex index_counter_mutex_;
+
+  void create_new_shard(const std::shared_ptr<Node> &node);
+
+ public:
+  explicit ShardManager(std::shared_ptr<SchemaRegistry> schema_registry,
+                        const DatabaseConfig &config);
+
+  void set_id_counter(int64_t value);
+  [[nodiscard]] int64_t get_id_counter() const;
+  void set_index_counter(const std::string &schema_name, int64_t value);
+  [[nodiscard]] int64_t get_index_counter(const std::string &schema_name) const;
+
+  arrow::Result<std::shared_ptr<Shard>> get_shard(
+      const std::string &schema_name, int64_t id);
+
+  [[nodiscard]] std::vector<std::string> get_schema_names() const;
+
+  [[nodiscard]] arrow::Result<std::vector<std::shared_ptr<Shard>>> get_shards(
+      const std::string &schema_name) const;
+
+  arrow::Result<bool> is_shard_clean(std::string s, int64_t id);
+
+  arrow::Result<bool> compact(const std::string &schema_name);
+  arrow::Result<bool> compact_all();
+
+  arrow::Result<bool> insert_node(const std::shared_ptr<Node> &node);
+
+  arrow::Result<std::shared_ptr<Node>> get_node(const std::string &schema_name,
+                                                int64_t node_id);
+
+  arrow::Result<bool> remove_node(const std::string &schema_name,
+                                  int64_t node_id);
+
+  arrow::Result<bool> update_node(const std::string &schema_name, int64_t id,
+                                  const std::shared_ptr<Field> &field,
+                                  const Value &value, UpdateType update_type);
+
+  arrow::Result<bool> update_node(const std::string &schema_name, int64_t id,
+                                  const std::string &field_name,
+                                  const Value &value, UpdateType update_type);
+
+  arrow::Result<bool> update_node_fields(
+      const std::string &schema_name, int64_t id,
+      const std::vector<std::pair<std::shared_ptr<Field>, Value>>
+          &field_updates,
+      UpdateType update_type);
+
+  arrow::Result<std::vector<std::shared_ptr<Node>>> get_nodes(
+      const std::string &schema_name);
+
+  arrow::Result<std::vector<std::shared_ptr<arrow::Table>>> get_tables(
+      const std::string &schema_name, TemporalContext *temporal_context);
+
+  [[nodiscard]] bool has_shards(const std::string &schema_name) const;
+
+  [[nodiscard]] arrow::Result<size_t> get_shard_count(
+      const std::string &schema_name) const;
+
+  [[nodiscard]] arrow::Result<std::vector<size_t>> get_shard_sizes(
+      const std::string &schema_name) const;
+
+  [[nodiscard]] arrow::Result<std::vector<std::pair<int64_t, int64_t>>>
+  get_shard_ranges(const std::string &schema_name) const;
+
+  arrow::Result<bool> add_shard(const std::shared_ptr<Shard> &shard);
+  arrow::Result<bool> reset_all_updated();
+};
+
+}  // namespace tundradb
diff --git a/include/snapshot_manager.hpp b/include/snapshot_manager.hpp
new file mode 100644
index 0000000..5a10edf
--- /dev/null
+++ b/include/snapshot_manager.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <arrow/result.h>
+
+#include <memory>
+
+#include "edge_store.hpp"
+#include "metadata.hpp"
+#include "schema.hpp"
+
+namespace tundradb {
+
+// Forward declarations
+class ShardManager;
+class Storage;
+class NodeManager;
+
+class SnapshotManager {
+ public:
+  explicit SnapshotManager(std::shared_ptr<MetadataManager> metadata_manager,
+                           std::shared_ptr<Storage> storage,
+                           std::shared_ptr<ShardManager> shard_manager,
+                           std::shared_ptr<EdgeStore> edge_store,
+                           std::shared_ptr<NodeManager> node_manager,
+                           std::shared_ptr<SchemaRegistry> schema_registry);
+
+  arrow::Result<bool> initialize();
+  arrow::Result<Snapshot> commit();
+  Snapshot *current_snapshot();
+  std::shared_ptr<Manifest> get_manifest();
+
+ private:
+  std::shared_ptr<MetadataManager> metadata_manager_;
+  std::shared_ptr<Storage> storage_;
+  std::shared_ptr<ShardManager> shard_manager_;
+  std::shared_ptr<SchemaRegistry> schema_registry_;
+  std::shared_ptr<EdgeStore> edge_store_;
+  std::shared_ptr<NodeManager> node_manager_;
+  Metadata metadata_;
+  std::shared_ptr<Manifest> manifest_;
+  std::shared_ptr<EdgeMetadata> edge_metadata_;
+};
+
+}  // namespace tundradb
diff --git a/include/storage.hpp b/include/storage.hpp
index ff6e42e..3114faf 100644
--- a/include/storage.hpp
+++ b/include/storage.hpp
@@ -13,6 +13,7 @@
 namespace tundradb {
 
 class SchemaRegistry;
+class NodeManager;
 class Shard;
 
 class Storage {
@@ -48,7 +49,4 @@ class Storage {
 
 }  // namespace tundradb
 
-// Include core.hpp after our declarations to prevent circular dependencies
-#include "core.hpp"
-
 #endif  // STORAGE_HPP
\ No newline at end of file
diff --git a/src/shard.cpp b/src/shard.cpp
new file mode 100644
index 0000000..543d790
--- /dev/null
+++ b/src/shard.cpp
@@ -0,0 +1,601 @@
+#include "shard.hpp"
+
+#include "logger.hpp"
+#include "utils.hpp"
+
+namespace tundradb {
+
+Shard::Shard(int64_t id, int64_t index, size_t capacity, int64_t min_id,
+             int64_t max_id, size_t chunk_size, std::string schema_name,
+             std::shared_ptr<SchemaRegistry> schema_registry,
+             size_t buffer_size)
+    : memory_pool_(buffer_size),
+      nodes_(&memory_pool_),
+      schema_registry_(std::move(schema_registry)),
+      updated_ts_(now_millis()),
+      id(id),
+      index(index),
+      min_id(min_id),
+      max_id(max_id),
+      capacity(capacity),
+      chunk_size(chunk_size),
+      schema_name(move(schema_name)) {}
+
+Shard::Shard(int64_t id, int64_t index, const DatabaseConfig &config,
+             int64_t min_id, int64_t max_id, std::string schema_name,
+             std::shared_ptr<SchemaRegistry> schema_registry)
+    : memory_pool_(config.get_shard_memory_pool_size()),
+      nodes_(&memory_pool_),
+      schema_registry_(std::move(schema_registry)),
+      updated_ts_(now_millis()),
+      id(id),
+      index(index),
+      min_id(min_id),
+      max_id(max_id),
+      capacity(config.get_shard_capacity()),
+      chunk_size(config.get_chunk_size()),
+      schema_name(std::move(schema_name)) {}
+
+Shard::~Shard() {
+  nodes_.clear();
+  nodes_ids_.clear();
+  table_.reset();
+}
+
+bool Shard::is_updated() const { return updated_; }
+
+bool Shard::set_updated(bool v) {
+  updated_ = v;
+  return updated_;
+}
+
+int64_t Shard::get_updated_ts() const { return updated_ts_; }
+
+std::string Shard::compound_id() const {
+  return schema_name + "-" + std::to_string(id);
+}
+
+arrow::Result<bool> Shard::add(const std::shared_ptr<Node> &node) {
+  if (node->id < min_id || node->id > max_id) {
+    return arrow::Status::Invalid("Node id is out of range");
+  }
+  if (nodes_.contains(node->id)) {
+    return arrow::Status::KeyError("Node already exists: ", node->id);
+  }
+  if (nodes_.size() >= capacity) {
+    return arrow::Status::KeyError("Shard is full");
+  }
+  nodes_.insert(std::make_pair(node->id, node));
+  nodes_ids_.insert(node->id);
+  dirty_ = true;
+  updated_ = true;
+  return true;
+}
+
+arrow::Result<bool> Shard::extend(const std::shared_ptr<Node> &node) {
+  if (nodes_.contains(node->id)) {
+    return arrow::Status::KeyError("Node already exists: ", node->id);
+  }
+  if (nodes_.size() >= capacity) {
+    return arrow::Status::KeyError("Shard is full");
+  }
+
+  if (empty()) {
+    min_id = node->id;
+    max_id = node->id;
+  } else {
+    if (node->id < min_id) {
+      return arrow::Status::Invalid("Node id is below the minimum range");
+    }
+    max_id = std::max(max_id, node->id);
+  }
+
+  nodes_.insert(std::make_pair(node->id, node));
+  nodes_ids_.insert(node->id);
+
+  dirty_ = true;
+  updated_ = true;
+  updated_ts_ = now_millis();
+  return true;
+}
+
+arrow::Result<std::shared_ptr<Node>> Shard::remove(int64_t id) {
+  const auto it = nodes_.find(id);
+  if (it == nodes_.end()) {
+    return arrow::Status::Invalid("Node not found: ", id);
+  }
+  auto node = it->second;
+  nodes_.erase(id);
+  nodes_ids_.erase(id);
+  dirty_ = true;
+  updated_ = true;
+  return node;
+}
+
+arrow::Result<std::shared_ptr<Node>> Shard::poll_first() {
+  if (nodes_ids_.empty()) {
+    return arrow::Status::Invalid("Shard is empty");
+  }
+  const auto first = nodes_ids_.begin();
+  const auto node_id = *first;
+  nodes_ids_.erase(first);
+  auto node = nodes_[node_id];
+  nodes_.erase(node_id);
+
+  if (!nodes_ids_.empty()) {
+    min_id = *nodes_ids_.begin();
+  }
+
+  dirty_ = true;
+  updated_ = true;
+  updated_ts_ = now_millis();
+  return node;
+}
+
+arrow::Result<bool> Shard::update(const int64_t node_id,
+                                  std::shared_ptr<Field> field,
+                                  const Value &value,
+                                  const UpdateType update_type) {
+  updated_ = true;
+  if (!nodes_.contains(node_id)) {
+    return arrow::Status::KeyError("Node not found: ", node_id);
+  }
+  dirty_ = true;
+  updated_ = true;
+  updated_ts_ = now_millis();
+  return nodes_[node_id]->update(field, value, update_type);
+}
+
+arrow::Result<bool> Shard::update_fields(
+    const int64_t node_id,
+    const std::vector<std::pair<std::shared_ptr<Field>, Value>> &field_updates,
+    const UpdateType update_type) {
+  if (!nodes_.contains(node_id)) {
+    return arrow::Status::KeyError("Node not found: ", node_id);
+  }
+  dirty_ = true;
+  updated_ = true;
+  updated_ts_ = now_millis();
+  return nodes_[node_id]->update_fields(field_updates, update_type);
+}
+
+arrow::Result<std::shared_ptr<arrow::Table>> Shard::get_table(
+    TemporalContext *ctx) {
+  if (dirty_ || !table_ || ctx) {
+    ARROW_ASSIGN_OR_RAISE(const auto schema,
+                          schema_registry_->get(schema_name));
+    auto arrow_schema = schema->arrow();
+
+    std::vector<std::shared_ptr<Node>> result;
+    std::ranges::transform(nodes_, std::back_inserter(result),
+                           [](const auto &pair) { return pair.second; });
+
+    std::ranges::sort(
+        result, [](const std::shared_ptr<Node> &a,
+                   const std::shared_ptr<Node> &b) { return a->id < b->id; });
+
+    ARROW_ASSIGN_OR_RAISE(auto table_res,
+                          create_table(schema, result, chunk_size, ctx));
+
+    if (!ctx) {
+      table_ = table_res;
+      dirty_ = false;
+    }
+
+    return table_res;
+  }
+
+  return table_;
+}
+
+size_t Shard::size() const { return nodes_.size(); }
+
+bool Shard::has_space() const { return nodes_.size() < capacity; }
+
+bool Shard::empty() const { return nodes_.empty(); }
+
+std::vector<std::shared_ptr<Node>> Shard::get_nodes() const {
+  std::vector<std::shared_ptr<Node>> result;
+  result.reserve(nodes_.size());
+  for (const auto &node : nodes_ | std::views::values) {
+    result.push_back(node);
+  }
+  return result;
+}
+
+// =========================================================================
+// ShardManager
+// =========================================================================
+
+ShardManager::ShardManager(std::shared_ptr<SchemaRegistry> schema_registry,
+                           const DatabaseConfig &config)
+    : memory_pool_(config.get_manager_memory_pool_size()),
+      shards_(&memory_pool_),
+      schema_registry_(std::move(schema_registry)),
+      shard_capacity_(config.get_shard_capacity()),
+      chunk_size_(config.get_chunk_size()),
+      config_(config) {}
+
+void ShardManager::set_id_counter(const int64_t value) {
+  id_counter_.store(value);
+}
+
+int64_t ShardManager::get_id_counter() const { return id_counter_.load(); }
+
+void ShardManager::set_index_counter(const std::string &schema_name,
+                                     const int64_t value) {
+  std::lock_guard lock(index_counter_mutex_);
+  index_counters_[schema_name].store(value);
+}
+
+int64_t ShardManager::get_index_counter(const std::string &schema_name) const {
+  std::lock_guard lock(index_counter_mutex_);
+  const auto it = index_counters_.find(schema_name);
+  return it != index_counters_.end() ? it->second.load() : 0;
+}
+
+arrow::Result<std::shared_ptr<Shard>> ShardManager::get_shard(
+    const std::string &schema_name, const int64_t id) {
+  return shards_[schema_name][id];
+}
+
+std::vector<std::string> ShardManager::get_schema_names() const {
+  std::vector<std::string> schema_names;
+  schema_names.reserve(shards_.size());
+  for (const auto &schema_name : shards_ | std::views::keys) {
+    schema_names.push_back(schema_name);
+  }
+  return schema_names;
+}
+
+arrow::Result<std::vector<std::shared_ptr<Shard>>> ShardManager::get_shards(
+    const std::string &schema_name) const {
+  const auto it = shards_.find(schema_name);
+  if (it == shards_.end()) {
+    return arrow::Status::KeyError("Schema '", schema_name,
+                                   "' not found in shards");
+  }
+  return it->second;
+}
+
+arrow::Result<bool> ShardManager::is_shard_clean(std::string s, int64_t id) {
+  return !shards_[s][id]->is_updated();
+}
+
+arrow::Result<bool> ShardManager::compact(const std::string &schema_name) {
+  const auto it = shards_.find(schema_name);
+  if (it == shards_.end()) {
+    return arrow::Status::Invalid("Shard not found for the given schema: ",
+                                  schema_name);
+  }
+
+  auto &shard_list = it->second;
+  if (shard_list.size() <= 1) {
+    return true;
+  }
+
+  for (size_t i = 1; i < shard_list.size(); i++) {
+    const auto &prev = shard_list[i - 1];
+    const auto &curr = shard_list[i];
+
+    while (prev->has_space() && !curr->empty()) {
+      auto node = curr->poll_first().ValueOrDie();
+      prev->extend(node).ValueOrDie();
+      if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+        log_debug("node id: " + std::to_string(node->id) +
+                  " moved from shard: " + std::to_string(i) +
+                  " to shard: " + std::to_string(i - 1));
+        log_debug("prev shard id: " + std::to_string(i - 1) +
+                  " min_id=" + std::to_string(prev->min_id) +
+                  " max_id=" + std::to_string(prev->max_id));
+        log_debug("curr shard id: " + std::to_string(i) +
+                  " min_id=" + std::to_string(curr->min_id) +
+                  " max_id=" + std::to_string(curr->max_id));
+      }
+    }
+  }
+
+  auto it_shard = shard_list.begin();
+  while (it_shard != shard_list.end()) {
+    if ((*it_shard)->empty()) {
+      it_shard = shard_list.erase(it_shard);
+    } else {
+      ++it_shard;
+    }
+  }
+
+  return true;
+}
+
+arrow::Result<bool> ShardManager::compact_all() {
+  const std::vector<std::string> schema_names =
+      schema_registry_->get_schema_names();
+  bool success = true;
+
+  for (const auto &schema_name : schema_names) {
+    if (auto result = compact(schema_name); !result.ok()) {
+      log_error("Error compacting schema '{}':{}", schema_name,
+                result.status().ToString());
+      success = false;
+    }
+  }
+
+  return success;
+}
+
+arrow::Result<bool> ShardManager::insert_node(
+    const std::shared_ptr<Node> &node) {
+  if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+    log_debug("inserting node id " + std::to_string(node->id));
+  }
+  const auto it = shards_.find(node->schema_name);
+  if (it == shards_.end()) {
+    shards_[node->schema_name] = std::vector<std::shared_ptr<Shard>>();
+    create_new_shard(node);
+    return true;
+  }
+
+  const auto &shard_list = it->second;
+  if (shard_list.empty()) {
+    create_new_shard(node);
+    return true;
+  }
+
+  for (auto &shard : shard_list) {
+    if (node->id >= shard->min_id && node->id <= shard->max_id &&
+        shard->has_space()) {
+      if (auto result = shard->add(node); result.ok()) {
+        if (Logger::get_instance().get_level() == LogLevel::DEBUG) {
+          log_debug("node id: '" + std::to_string(node->id) +
+                    "' inserted to shard id: " + std::to_string(shard->id));
+        }
+        return true;
+      }
+    }
+  }
+
+  for (auto &shard : shard_list) {
+    if (shard->has_space()) {
+      if (node->id > shard->max_id) {
+        if (auto result = shard->extend(node); result.ok()) {
+          return true;
+        }
+      }
+    }
+  }
+  create_new_shard(node);
+  return true;
+}
+
+void ShardManager::create_new_shard(const std::shared_ptr<Node> &node) {
+  auto new_min_id = node->id;
+  auto new_max_id = node->id + static_cast<int64_t>(shard_capacity_) - 1;
+
+  int64_t shard_index;
+  {
+    std::lock_guard lock(index_counter_mutex_);
+    shard_index = index_counters_[node->schema_name]++;
+  }
+
+  auto shard = std::make_shared<Shard>(id_counter_.fetch_add(1), shard_index,
+                                       config_, new_min_id, new_max_id,
+                                       node->schema_name, schema_registry_);
+  auto result = shard->add(node);
+  if (!result.ok()) {
+    log_error("Error adding node to new shard: {}", result.status().ToString());
+  }
+
+  shards_[node->schema_name].push_back(shard);
+}
+
+arrow::Result<std::shared_ptr<Node>> ShardManager::get_node(
+    const std::string &schema_name, int64_t node_id) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return arrow::Status::KeyError("Schema '", schema_name,
+                                   "' not found in shards");
+  }
+
+  for (const auto &shard : schema_it->second) {
+    if (node_id >= shard->min_id && node_id <= shard->max_id) {
+      try {
+        if (auto node_result = shard->remove(node_id); node_result.ok()) {
+          return node_result.ValueOrDie();
+        }
+      } catch (...) {
+        // node wasn't in this shard, continue to next shard
+      }
+    }
+  }
+
+  return arrow::Status::KeyError("Node with id ", node_id,
+                                 " not found in schema '", schema_name, "'");
+}
+
+arrow::Result<bool> ShardManager::remove_node(const std::string &schema_name,
+                                              int64_t node_id) {
+  if (!shards_.contains(schema_name)) {
+    return arrow::Status::KeyError("Schema '", schema_name,
+                                   "' not found in shards");
+  }
+
+  for (const auto &shard : shards_[schema_name]) {
+    if (node_id >= shard->min_id && node_id <= shard->max_id) {
+      if (auto remove_result = shard->remove(node_id); remove_result.ok()) {
+        return true;
+      }
+    }
+  }
+
+  return arrow::Status::KeyError("Node with id ", node_id,
+                                 " not found in schema '", schema_name, "'");
+}
+
+arrow::Result<bool> ShardManager::update_node(
+    const std::string &schema_name, const int64_t id,
+    const std::shared_ptr<Field> &field, const Value &value,
+    const UpdateType update_type) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return arrow::Status::KeyError("Schema not found: ", schema_name);
+  }
+
+  for (const auto &shard : schema_it->second) {
+    if (id >= shard->min_id && id <= shard->max_id) {
+      return shard->update(id, field, value, update_type);
+    }
+  }
+
+  return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
+                                 schema_name);
+}
+
+arrow::Result<bool> ShardManager::update_node(const std::string &schema_name,
+                                              const int64_t id,
+                                              const std::string &field_name,
+                                              const Value &value,
+                                              const UpdateType update_type) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return arrow::Status::KeyError("Schema not found: ", schema_name,
+                                   " in shards");
+  }
+
+  auto field =
+      schema_registry_->get(schema_name).ValueOrDie()->get_field(field_name);
+
+  for (const auto &shard : schema_it->second) {
+    if (id >= shard->min_id && id <= shard->max_id) {
+      return shard->update(id, field, value, update_type);
+    }
+  }
+
+  return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
+                                 schema_name);
+}
+
+arrow::Result<bool> ShardManager::update_node_fields(
+    const std::string &schema_name, const int64_t id,
+    const std::vector<std::pair<std::shared_ptr<Field>, Value>> &field_updates,
+    const UpdateType update_type) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return arrow::Status::KeyError("Schema not found: ", schema_name);
+  }
+
+  for (const auto &shard : schema_it->second) {
+    if (id >= shard->min_id && id <= shard->max_id) {
+      return shard->update_fields(id, field_updates, update_type);
+    }
+  }
+
+  return arrow::Status::KeyError("Node with id ", id, " not found in schema ",
+                                 schema_name);
+}
+
+arrow::Result<std::vector<std::shared_ptr<Node>>> ShardManager::get_nodes(
+    const std::string &schema_name) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return arrow::Status::KeyError("Schema '", schema_name,
+                                   "' not found in shards");
+  }
+
+  std::vector<std::shared_ptr<Node>> result;
+  size_t total_estimated_nodes = 0;
+  for (const auto &shard : schema_it->second) {
+    total_estimated_nodes += shard->size();
+  }
+  result.reserve(total_estimated_nodes);
+
+  for (const auto &shard : schema_it->second) {
+    auto nodes = shard->get_nodes();
+    result.insert(result.end(), nodes.begin(), nodes.end());
+  }
+
+  return result;
+}
+
+arrow::Result<std::vector<std::shared_ptr<arrow::Table>>>
+ShardManager::get_tables(const std::string &schema_name,
+                         TemporalContext *temporal_context) {
+  const auto schema_it = shards_.find(schema_name);
+  if (schema_it == shards_.end()) {
+    return std::vector<std::shared_ptr<arrow::Table>>{};
+  }
+
+  std::vector<std::shared_ptr<Shard>> sorted_shards = schema_it->second;
+
+  std::ranges::sort(sorted_shards, [](const std::shared_ptr<Shard> &a,
+                                      const std::shared_ptr<Shard> &b) {
+    return a->min_id < b->min_id;
+  });
+
+  std::vector<std::shared_ptr<arrow::Table>> tables;
+  for (const auto &shard : sorted_shards) {
+    ARROW_ASSIGN_OR_RAISE(auto table, shard->get_table(temporal_context));
+    if (table->num_rows() > 0) {
+      tables.push_back(table);
+    }
+  }
+
+  return tables;
+}
+
+bool ShardManager::has_shards(const std::string &schema_name) const {
+  const auto it = shards_.find(schema_name);
+  return it != shards_.end() && !it->second.empty();
+}
+
+arrow::Result<size_t> ShardManager::get_shard_count(
+    const std::string &schema_name) const {
+  if (!has_shards(schema_name)) {
+    return arrow::Status::Invalid("Schema '", schema_name, "' not found");
+  }
+  return shards_.find(schema_name)->second.size();
+}
+
+arrow::Result<std::vector<size_t>> ShardManager::get_shard_sizes(
+    const std::string &schema_name) const {
+  if (!has_shards(schema_name)) {
+    return arrow::Status::Invalid("Schema '", schema_name, "' not found");
+  }
+  std::vector<size_t> sizes;
+  for (const auto &shard : shards_.find(schema_name)->second) {
+    sizes.push_back(shard->size());
+  }
+  return sizes;
+}
+
+arrow::Result<std::vector<std::pair<int64_t, int64_t>>>
+ShardManager::get_shard_ranges(const std::string &schema_name) const {
+  if (!has_shards(schema_name)) {
+    return arrow::Status::Invalid("Schema '", schema_name, "' not found");
+  }
+  std::vector<std::pair<int64_t, int64_t>> ranges;
+  for (const auto &shard : shards_.find(schema_name)->second) {
+    ranges.emplace_back(shard->min_id, shard->max_id);
+  }
+  return ranges;
+}
+
+arrow::Result<bool> ShardManager::add_shard(
+    const std::shared_ptr<Shard> &shard) {
+  if (!shard) {
+    return arrow::Status::Invalid("Cannot add null shard");
+  }
+  shards_[shard->schema_name].push_back(shard);
+  return true;
+}
+
+arrow::Result<bool> ShardManager::reset_all_updated() {
+  log_debug("Resetting 'updated' flag for all shards");
+  for (auto &schema_shards : shards_ | std::views::values) {
+    for (auto &shard : schema_shards) {
+      shard->set_updated(false);
+    }
+  }
+  return true;
+}
+
+}  // namespace tundradb
diff --git a/src/snapshot.cpp b/src/snapshot_manager.cpp
similarity index 93%
rename from src/snapshot.cpp
rename to src/snapshot_manager.cpp
index 203234a..f0a9431 100644
--- a/src/snapshot.cpp
+++ b/src/snapshot_manager.cpp
@@ -1,10 +1,28 @@
-#include "core.hpp"
+#include "snapshot_manager.hpp"
+
+#include "edge.hpp"
 #include "logger.hpp"
-#include "metadata.hpp"
+#include "node.hpp"
+#include "shard.hpp"
+#include "storage.hpp"
 #include "utils.hpp"
 
 namespace tundradb {
 
+SnapshotManager::SnapshotManager(
+    std::shared_ptr<MetadataManager> metadata_manager,
+    std::shared_ptr<Storage> storage,
+    std::shared_ptr<ShardManager> shard_manager,
+    std::shared_ptr<EdgeStore> edge_store,
+    std::shared_ptr<NodeManager> node_manager,
+    std::shared_ptr<SchemaRegistry> schema_registry)
+    : metadata_manager_(std::move(metadata_manager)),
+      storage_(std::move(storage)),
+      shard_manager_(std::move(shard_manager)),
+      edge_store_(std::move(edge_store)),
+      node_manager_(std::move(node_manager)),
+      schema_registry_(std::move(schema_registry)) {}
+
 arrow::Result<bool> SnapshotManager::initialize() {
   log_info("Initializing snapshot manager...");
   try {
@@ -93,8 +111,7 @@ arrow::Result<bool> SnapshotManager::initialize() {
           if (!shard_result.ok()) {
             log_error("Failed to load shard: " +
                       shard_result.status().ToString());
-            return shard_result
-                .status();  // Return the error instead of continuing
+            return shard_result.status();
           }
 
           const auto &shard = shard_result.ValueOrDie();
@@ -163,8 +180,6 @@ arrow::Result<Snapshot> SnapshotManager::commit() {
   for (const auto &schema_name : this->shard_manager_->get_schema_names()) {
     for (const auto &shard :
          this->shard_manager_->get_shards(schema_name).ValueOrDie()) {
-      // If the shard existed before compaction and wasn't marked as updated,
-      // restore that status
       if (original_update_states.contains(schema_name) &&
           original_update_states[schema_name].contains(shard->id) &&
           !original_update_states[schema_name][shard->id]) {
@@ -174,7 +189,7 @@ arrow::Result<Snapshot> SnapshotManager::commit() {
   }
 
   Snapshot new_snapshot;
-  new_snapshot.id = generate_unique_snapshot_id();  // timestamp_ms;
+  new_snapshot.id = generate_unique_snapshot_id();
   new_snapshot.timestamp_ms = timestamp_ms;
 
   if (this->metadata_.get_current_snapshot() != nullptr) {
@@ -244,7 +259,6 @@ arrow::Result<Snapshot> SnapshotManager::commit() {
       log_debug("Snapshotting shard id: " + std::to_string(shard->id));
       log_debug("Snapshotting shard size: " + std::to_string(shard->size()));
 
-      // Only write updated shards, reuse unchanged ones
       if (shard->is_updated()) {
         ShardMetadata shard_metadata;
         shard_metadata.id = shard->id;
diff --git a/src/storage.cpp b/src/storage.cpp
index e631abe..2ce2d77 100644
--- a/src/storage.cpp
+++ b/src/storage.cpp
@@ -19,6 +19,8 @@
 #include "json.hpp"
 #include "logger.hpp"
 #include "metadata.hpp"
+#include "node.hpp"
+#include "shard.hpp"
 #include "table_info.hpp"
 
 namespace tundradb {

From 8c6fb61d4d12a5201ae4bde995955f931471021d Mon Sep 17 00:00:00 2001
From: dmgcodevil <dmgcodevil@gmail.com>
Date: Sat, 7 Mar 2026 19:59:26 -0500
Subject: [PATCH 4/4] update with match

---
 antlr/TundraQL.g4    |  2 ++
 docs/tundraql.html   | 38 +++++++++++++++++++++++---
 src/tundra_shell.cpp | 63 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/antlr/TundraQL.g4 b/antlr/TundraQL.g4
index 4b4a50e..3e06a7e 100644
--- a/antlr/TundraQL.g4
+++ b/antlr/TundraQL.g4
@@ -50,10 +50,12 @@ edgeDeleteTarget:
 // --- Update Statement ---
 // UPDATE User(0) SET name = "Bob", age = 31;
 // UPDATE (u:User) SET u.age = 31 WHERE u.name = "Alice";
+// UPDATE MATCH (u:User)-[:WORKS_AT]->(c:Company) SET u.employed = true, c.size = 1 WHERE c.name = "Acme";
 updateStatement: K_UPDATE updateTarget K_SET setClause (K_WHERE whereClause)? SEMI;
 
 updateTarget:
     nodeLocator                    // UPDATE User(0) SET ...;
+    | K_MATCH patternList          // UPDATE MATCH (u:User)-[:WORKS_AT]->(c:Company) SET ...;
     | nodePattern;                 // UPDATE (u:User) SET ... WHERE ...;
 
 setClause: setAssignment (COMMA setAssignment)*;
diff --git a/docs/tundraql.html b/docs/tundraql.html
index 7701c53..e8fa4eb 100644
--- a/docs/tundraql.html
+++ b/docs/tundraql.html
@@ -304,7 +304,7 @@ <h2 id="delete">DELETE</h2>
 
             <!-- ========== UPDATE ========== -->
             <h2 id="update">UPDATE</h2>
-            <p>Modifies field values on existing nodes. Supports two forms: <strong>by ID</strong> (direct) and <strong>by pattern</strong> (with optional <code>WHERE</code> filter).</p>
+            <p>Modifies field values on existing nodes. Supports three forms: <strong>by ID</strong> (direct), <strong>by pattern</strong> (single schema + optional <code>WHERE</code>), and <strong>by MATCH</strong> (traversals / joins + optional <code>WHERE</code>).</p>
 
             <h3>Form 1 — Update by ID</h3>
             <p>Targets a single node using <code>Schema(id)</code>. Field names are bare (no alias prefix).</p>
@@ -346,12 +346,39 @@ <h3>Form 2 — Update by Pattern</h3>
 <span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">age</span> = <span class="num">0</span>;</code></pre>
             </div>
 
+            <h3>Form 3 — Update by MATCH (traversals / joins)</h3>
+            <p>Uses a full <code>MATCH</code> pattern with traversals to find nodes across multiple schemas, then applies <code>SET</code> assignments. Field names must be alias-qualified. Multiple schemas can be updated in a single statement.</p>
+
+            <div class="syntax-block">
+                <div class="label">Syntax <span class="tag">DML</span></div>
+                <pre><code><span class="kw">UPDATE MATCH</span> (<span class="alias">a</span>:<span class="type">Schema1</span>)-[:<span class="edge-type">EDGE_TYPE</span>]-&gt;(<span class="alias">b</span>:<span class="type">Schema2</span>)
+    <span class="kw">SET</span> <span class="alias">a</span>.<span class="alias">field</span> = <span class="str">value</span> [, <span class="alias">b</span>.<span class="alias">field</span> = <span class="str">value</span> ...]
+    [<span class="kw">WHERE</span> <span class="alias">alias</span>.<span class="alias">field</span> <span class="op">op</span> <span class="str">value</span>] ;</code></pre>
+            </div>
+
+            <div class="syntax-block">
+                <div class="label">Examples</div>
+                <pre><code><span class="cmt">// Update both user and company for a traversal</span>
+<span class="kw">UPDATE MATCH</span> (<span class="alias">u</span>:<span class="type">User</span>)-[:<span class="edge-type">WORKS_AT</span>]-&gt;(<span class="alias">c</span>:<span class="type">Company</span>)
+    <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">employed</span> = <span class="num">true</span>, <span class="alias">c</span>.<span class="alias">size</span> = <span class="num">1</span>
+    <span class="kw">WHERE</span> <span class="alias">c</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Acme Corp"</span>;
+
+<span class="cmt">// Update only one side of the relationship</span>
+<span class="kw">UPDATE MATCH</span> (<span class="alias">u</span>:<span class="type">User</span>)-[:<span class="edge-type">WORKS_AT</span>]-&gt;(<span class="alias">c</span>:<span class="type">Company</span>)
+    <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">status</span> = <span class="str">"employed"</span>
+    <span class="kw">WHERE</span> <span class="alias">c</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Google"</span>;
+
+<span class="cmt">// Same-schema traversal (e.g. friends)</span>
+<span class="kw">UPDATE MATCH</span> (<span class="alias">a</span>:<span class="type">User</span>)-[:<span class="edge-type">FRIEND</span>]-&gt;(<span class="alias">b</span>:<span class="type">User</span>)
+    <span class="kw">SET</span> <span class="alias">a</span>.<span class="alias">has_friend</span> = <span class="num">true</span>, <span class="alias">b</span>.<span class="alias">has_friend</span> = <span class="num">true</span>;</code></pre>
+            </div>
+
             <div class="info-box">
                 <strong>Versioning:</strong> When multiple fields are updated in a single <code>SET</code> clause, TundraDB creates <strong>one version</strong> for the entire batch — not one per field.
             </div>
 
             <div class="warn-box">
-                <strong>Pattern form requires alias prefix:</strong> <code>SET age = 31</code> is only valid in the by-ID form. In the pattern form you must write <code>SET u.age = 31</code>.
+                <strong>Pattern/MATCH forms require alias prefix:</strong> <code>SET age = 31</code> is only valid in the by-ID form. In pattern and MATCH forms you must write <code>SET u.age = 31</code>.
             </div>
 
             <!-- ========== COMMIT ========== -->
@@ -572,7 +599,12 @@ <h2 id="patterns">Pattern Syntax Reference</h2>
 <span class="cmt">// 6. Bulk update: set all users older than 30 to "Senior"</span>
 <span class="kw">UPDATE</span> (<span class="alias">u</span>:<span class="type">User</span>) <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">name</span> = <span class="str">"Senior"</span> <span class="kw">WHERE</span> <span class="alias">u</span>.<span class="alias">age</span> <span class="op">&gt;</span> <span class="num">30</span>;
 
-<span class="cmt">// 7. Persist</span>
+<span class="cmt">// 7. Update with MATCH (traversal) — set employed flag for Google employees</span>
+<span class="kw">UPDATE MATCH</span> (<span class="alias">u</span>:<span class="type">User</span>)-[:<span class="edge-type">works_at</span>]-&gt;(<span class="alias">c</span>:<span class="type">Company</span>)
+    <span class="kw">SET</span> <span class="alias">u</span>.<span class="alias">name</span> = <span class="str">"Employed"</span>
+    <span class="kw">WHERE</span> <span class="alias">c</span>.<span class="alias">name</span> <span class="op">=</span> <span class="str">"Google"</span>;
+
+<span class="cmt">// 8. Persist</span>
 <span class="kw">COMMIT</span>;</code></pre>
             </div>
 
diff --git a/src/tundra_shell.cpp b/src/tundra_shell.cpp
index 432e08f..9ba74bc 100644
--- a/src/tundra_shell.cpp
+++ b/src/tundra_shell.cpp
@@ -943,7 +943,6 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
 
     if (node_id.has_value()) {
       // ----- Mode 1: UPDATE User(0) SET age = 31; -----
-      // Bare field names, single schema.
       auto schema_result = schema_registry->get(schema_name);
       if (!schema_result.ok()) {
         throw std::runtime_error("Schema '" + schema_name + "' not found");
@@ -954,7 +953,7 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
       for (auto assignment : setClause->setAssignment()) {
         std::string field_name;
         if (assignment->IDENTIFIER().size() == 2) {
-          field_name = assignment->IDENTIFIER(1)->getText();  // strip alias
+          field_name = assignment->IDENTIFIER(1)->getText();
         } else {
           field_name = assignment->IDENTIFIER(0)->getText();
         }
@@ -984,9 +983,43 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
                        << "): " << update_query.assignments().size()
                        << " field(s)" << std::endl;
     } else {
-      // ----- Mode 2: UPDATE (u:User) SET u.age = 31 WHERE u.name = "Alice";
-      // Alias-qualified SET fields (e.g. "u.age").
-      auto query_builder = tundradb::Query::from(alias + ":" + schema_name);
+      // ----- Mode 2 & 3: pattern-based or MATCH-based UPDATE -----
+      // Build a Query from the pattern(s).
+      // Mode 2: UPDATE (u:User) SET ... WHERE ...        → single nodePattern
+      // Mode 3: UPDATE MATCH (u:User)-[:E]->(c:C) SET ... WHERE ... → full
+      // MATCH
+
+      tundradb::Query::Builder query_builder = [&]() {
+        if (updateTarget->patternList()) {
+          // Mode 3: full MATCH patterns
+          auto patterns = updateTarget->patternList()->pathPattern();
+          auto qb = processPathPattern(patterns[0]);
+          for (size_t p = 1; p < patterns.size(); p++) {
+            processAdditionalPattern(qb, patterns[p]);
+          }
+          return qb;
+        }
+        // Mode 2: single nodePattern → build a trivial query
+        return tundradb::Query::from(alias + ":" + schema_name);
+      }();
+
+      // Build alias→schema map from the query builder's pattern
+      std::unordered_map<std::string, std::string> alias_to_schema;
+      if (updateTarget->patternList()) {
+        for (auto pathPat : updateTarget->patternList()->pathPattern()) {
+          for (auto nodePat : pathPat->nodePattern()) {
+            if (nodePat->IDENTIFIER().size() > 1) {
+              alias_to_schema[nodePat->IDENTIFIER(0)->getText()] =
+                  nodePat->IDENTIFIER(1)->getText();
+            } else {
+              auto name = nodePat->IDENTIFIER(0)->getText();
+              alias_to_schema[name] = name;
+            }
+          }
+        }
+      } else {
+        alias_to_schema[alias] = schema_name;
+      }
 
       if (ctx->whereClause()) {
         processWhereClause(query_builder, ctx->whereClause());
@@ -995,33 +1028,33 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor {
       auto match_query = query_builder.build();
       auto builder = tundradb::UpdateQuery::match(std::move(match_query));
 
-      // Parse SET assignments — keep the alias.field format
+      // Parse SET assignments — keep alias.field format
       for (auto assignment : setClause->setAssignment()) {
         std::string qualified_name;
         if (assignment->IDENTIFIER().size() == 2) {
-          // "u.age" → keep as "u.age"
           qualified_name = assignment->IDENTIFIER(0)->getText() + "." +
                            assignment->IDENTIFIER(1)->getText();
         } else {
-          // bare field — assume the update target alias
           qualified_name = alias + "." + assignment->IDENTIFIER(0)->getText();
         }
 
-        // Resolve bare field for type conversion
+        std::string set_alias =
+            qualified_name.substr(0, qualified_name.find('.'));
         std::string bare_field =
             qualified_name.substr(qualified_name.find('.') + 1);
 
-        // Determine which schema this alias refers to
-        std::string set_alias =
-            qualified_name.substr(0, qualified_name.find('.'));
-        std::string set_schema = (set_alias == alias) ? schema_name : set_alias;
+        auto it = alias_to_schema.find(set_alias);
+        if (it == alias_to_schema.end()) {
+          throw std::runtime_error("Unknown alias '" + set_alias +
+                                   "' in SET clause");
+        }
+        const std::string& set_schema = it->second;
 
         auto s_result = schema_registry->get(set_schema);
         if (!s_result.ok()) {
           throw std::runtime_error("Schema '" + set_schema + "' not found");
         }
-        auto s = s_result.ValueOrDie();
-        auto field = s->get_field(bare_field);
+        auto field = s_result.ValueOrDie()->get_field(bare_field);
         if (!field) {
           throw std::runtime_error("Field '" + bare_field +
                                    "' not found in schema '" + set_schema +

Update Form	Field Name Format	Example
By ID	Bare name	`SET name = "Alice", age = 31`
By Pattern	Alias-qualified	`SET u.name = "Alice", u.age = 31`