From 6c8676e9dd5223cb43eeb29a1528b9d9a37f7d39 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Sat, 7 Mar 2026 11:29:43 -0500 Subject: [PATCH 1/4] update operation --- CMakeLists.txt | 2 +- antlr/TundraQL.g4 | 16 +- docs/tundraql.html | 93 +++++++- include/core.hpp | 97 ++++++++ include/node.hpp | 30 +++ include/query.hpp | 174 ++++++++++++++ src/core.cpp | 193 +++++++++++++++ src/tundra_shell.cpp | 184 ++++++++++++++ tests/CMakeLists.txt | 22 ++ tests/update_query_test.cpp | 461 ++++++++++++++++++++++++++++++++++++ 10 files changed, 1269 insertions(+), 3 deletions(-) create mode 100644 tests/update_query_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ec14971..9e1b7c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(tundradb VERSION 1.0.0 LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# Option: build shared or static library (default: static) +# Option: bui ld shared or static library (default: static) option(BUILD_SHARED_LIBS "Build TundraDB as a shared library" OFF) option(TUNDRADB_BUILD_SHELL "Build the tundra_shell interactive CLI" ON) option(TUNDRADB_BUILD_TESTS "Build tests" ON) diff --git a/antlr/TundraQL.g4 b/antlr/TundraQL.g4 index 9b9df70..4b4a50e 100644 --- a/antlr/TundraQL.g4 +++ b/antlr/TundraQL.g4 @@ -1,7 +1,7 @@ grammar TundraQL; // Entry point for parsing a full command -statement: createSchemaStatement | createNodeStatement | createEdgeStatement | matchStatement | deleteStatement | commitStatement | showStatement EOF; +statement: createSchemaStatement | createNodeStatement | createEdgeStatement | matchStatement | deleteStatement | updateStatement | commitStatement | showStatement EOF; // --- Schema Definition --- createSchemaStatement: K_CREATE K_SCHEMA IDENTIFIER LPAREN schemaFieldList RPAREN SEMI; @@ -47,6 +47,18 @@ edgeDeleteTarget: | K_EDGE IDENTIFIER K_TO nodeSelector // DELETE EDGE edge_type TO node; | K_EDGE IDENTIFIER K_FROM nodeSelector K_TO nodeSelector; // DELETE EDGE edge_type FROM node TO node; +// --- Update Statement --- +// UPDATE User(0) SET name = "Bob", age = 31; +// UPDATE (u:User) SET u.age = 31 WHERE u.name = "Alice"; +updateStatement: K_UPDATE updateTarget K_SET setClause (K_WHERE whereClause)? SEMI; + +updateTarget: + nodeLocator // UPDATE User(0) SET ...; + | nodePattern; // UPDATE (u:User) SET ... WHERE ...; + +setClause: setAssignment (COMMA setAssignment)*; +setAssignment: IDENTIFIER (DOT IDENTIFIER)? EQ value; + // --- Commit Statement --- commitStatement: K_COMMIT SEMI; @@ -106,6 +118,8 @@ K_RIGHT: 'RIGHT'; K_FULL: 'FULL'; K_AND: 'AND'; K_OR: 'OR'; +K_UPDATE: 'UPDATE'; +K_SET: 'SET'; K_COMMIT: 'COMMIT'; K_UNIQUE: 'UNIQUE'; K_SHOW: 'SHOW'; diff --git a/docs/tundraql.html b/docs/tundraql.html index ff90f06..7701c53 100644 --- a/docs/tundraql.html +++ b/docs/tundraql.html @@ -129,6 +129,7 @@

Statements

CREATE NODE CREATE EDGE MATCH + UPDATE DELETE COMMIT SHOW @@ -136,6 +137,7 @@

Statements

Clauses

WHERE SELECT + SET JOIN Types

Reference

@@ -300,6 +302,58 @@

DELETE

DELETE EDGE works_at TO Company(0); + +

UPDATE

+

Modifies field values on existing nodes. Supports two forms: by ID (direct) and by pattern (with optional WHERE filter).

+ +

Form 1 — Update by ID

+

Targets a single node using Schema(id). Field names are bare (no alias prefix).

+ +
+
Syntax DML
+
UPDATE Schema(id) SET field = value [, field = value ...] ;
+
+ +
+
Examples
+
// Update a single field
+UPDATE User(0) SET age = 31;
+
+// Update multiple fields at once (creates one version)
+UPDATE User(0) SET name = "Alice B.", age = 31;
+
+ +

Form 2 — Update by Pattern

+

Uses a node pattern (alias:Schema) with an optional WHERE clause to match nodes. Field names must be alias-qualified (alias.field).

+ +
+
Syntax DML
+
UPDATE (alias:Schema) SET alias.field = value [, ...]
+    [WHERE alias.field op value] ;
+
+ +
+
Examples
+
// Update all users named Alice
+UPDATE (u:User) SET u.age = 31
+    WHERE u.name = "Alice";
+
+// Update with compound condition
+UPDATE (u:User) SET u.name = "Senior"
+    WHERE u.age > 30 AND u.age < 50;
+
+// Update all nodes of a schema (no WHERE)
+UPDATE (u:User) SET u.age = 0;
+
+ +
+ Versioning: When multiple fields are updated in a single SET clause, TundraDB creates one version for the entire batch — not one per field. +
+ +
+ Pattern form requires alias prefix: SET age = 31 is only valid in the by-ID form. In the pattern form you must write SET u.age = 31. +
+

COMMIT

Persists the current database state to disk (Parquet files + JSON metadata).

@@ -360,6 +414,37 @@

SELECT Clause

SELECT u.name AS employee, c.name AS company, u.age; + +

SET Clause

+

Specifies field assignments in an UPDATE statement. Comma-separated list of field = value pairs.

+ +
+
Syntax
+
SET field = value [, field = value ...]
+
+ + + + + + + + + + + + + + + + + +
Update FormField Name FormatExample
By IDBare nameSET name = "Alice", age = 31
By PatternAlias-qualifiedSET u.name = "Alice", u.age = 31
+ +
+ Batch semantics: All assignments in a single SET clause are applied atomically — one version is created per node, regardless of how many fields are changed. +
+

JOIN Types

Specified inside the edge pattern -[:EDGE_TYPE JOIN]->. Controls how unmatched nodes are handled.

@@ -481,7 +566,13 @@

Pattern Syntax Reference

WHERE u.name = "Alice" AND c.name = "Google" SELECT u.name AS user, f.name AS friend, c.name AS company; -// 5. Persist +// 5. Update: Alice turned 31 +UPDATE User(0) SET age = 31; + +// 6. Bulk update: set all users older than 30 to "Senior" +UPDATE (u:User) SET u.name = "Senior" WHERE u.age > 30; + +// 7. Persist COMMIT; diff --git a/include/core.hpp b/include/core.hpp index 5305c44..caad2da 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -237,6 +237,23 @@ class Shard { return nodes_[node_id]->update(field, value, update_type); } + /** + * @brief Batch-update multiple fields on one node (creates 1 version). + */ + arrow::Result update_fields( + const int64_t node_id, + const std::vector, Value>> + &field_updates, + const UpdateType update_type) { + if (!nodes_.contains(node_id)) { + return arrow::Status::KeyError("Node not found: ", node_id); + } + dirty_ = true; + updated_ = true; + updated_ts_ = now_millis(); + return nodes_[node_id]->update_fields(field_updates, update_type); + } + arrow::Result> get_table(TemporalContext *ctx) { // if we have ctx we need to create a new table every time if (dirty_ || !table_ || ctx) { @@ -572,6 +589,29 @@ class ShardManager { schema_name); } + /** + * @brief Batch-update multiple fields on one node (creates 1 version). + */ + arrow::Result update_node_fields( + const std::string &schema_name, const int64_t id, + const std::vector, Value>> + &field_updates, + const UpdateType update_type) { + auto schema_it = shards_.find(schema_name); + if (schema_it == shards_.end()) { + return arrow::Status::KeyError("Schema not found: ", schema_name); + } + + for (const auto &shard : schema_it->second) { + if (id >= shard->min_id && id <= shard->max_id) { + return shard->update_fields(id, field_updates, update_type); + } + } + + return arrow::Status::KeyError("Node with id ", id, " not found in schema ", + schema_name); + } + arrow::Result>> get_nodes( const std::string &schema_name) { const auto schema_it = shards_.find(schema_name); @@ -785,6 +825,18 @@ class Database { update_type); } + /** + * @brief Batch-update multiple fields on one node (creates 1 version). + */ + arrow::Result update_node_fields( + const std::string &schema_name, const int64_t id, + const std::vector, Value>> + &field_updates, + const UpdateType update_type) { + return shard_manager_->update_node_fields(schema_name, id, field_updates, + update_type); + } + arrow::Result remove_node(const std::string &schema_name, int64_t node_id) { if (auto res = node_manager_->remove_node(schema_name, node_id); !res) { @@ -877,6 +929,51 @@ class Database { [[nodiscard]] arrow::Result> query( const Query &query) const; + + /** + * @brief Execute an UpdateQuery. + * + * Mode 1 — by ID (bare field names): + * db.update(UpdateQuery::on("User", 0).set("age", Value(31)).build()); + * + * Mode 2 — by MATCH query (alias-qualified SET, multi-schema): + * db.update(UpdateQuery::match( + * Query::from("u:User") + * .traverse("u", "WORKS_AT", "c:Company") + * .where("c.name", CompareOp::Eq, Value("Google")) + * .build() + * ).set("u.status", Value("employed")) + * .set("c.size", Value(int32_t(5001))) + * .build()); + */ + [[nodiscard]] arrow::Result update(const UpdateQuery &uq); + + private: + /** Mode 1: update a single node by schema + ID. */ + [[nodiscard]] arrow::Result update_by_id(const UpdateQuery &uq); + + /** Mode 2: find nodes via MATCH query, then batch-update each. */ + [[nodiscard]] arrow::Result update_by_match( + const UpdateQuery &uq); + + /** + * Apply field updates to every node whose ID appears in @p id_column. + * One call to update_node_fields() per unique node ID (1 version each). + */ + void apply_updates( + const std::string &schema_name, + const std::shared_ptr &id_column, + const std::vector, Value>> &fields, + UpdateType update_type, UpdateResult &result); + + /** + * Build an alias→schema mapping from a Query's FROM + TRAVERSE clauses. + * Only declarations ("alias:Schema") are recorded; bare references ("alias") + * are skipped. Returns an error if the same alias is bound to two different + * schemas. + */ + static arrow::Result> + resolve_alias_map(const Query &query); }; } // namespace tundradb diff --git a/include/node.hpp b/include/node.hpp index 757aa82..b764bb1 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -114,6 +114,36 @@ class Node { return true; } + /** + * @brief Batch-update multiple fields in a single version. + * + * When using the arena (versioned storage), this creates exactly ONE + * new version for all field updates instead of N versions. + */ + arrow::Result update_fields( + const std::vector, Value>> + &field_updates, + UpdateType update_type) { + if (field_updates.empty()) return true; + + if (arena_ != nullptr) { + return arena_->update_fields(*handle_, layout_, field_updates); + } + + // Non-arena fallback: update data_ map directly + for (const auto &[field, value] : field_updates) { + if (data_.find(field->name()) == data_.end()) { + return arrow::Status::KeyError("Field not found: ", field->name()); + } + switch (update_type) { + case SET: + data_[field->name()] = value; + break; + } + } + return true; + } + [[deprecated]] arrow::Result set_value(const std::string &field, const Value &value) { log_warn("set_value by string is deprecated"); diff --git a/include/query.hpp b/include/query.hpp index c4cc92f..18d4e69 100644 --- a/include/query.hpp +++ b/include/query.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "node.hpp" @@ -630,6 +631,179 @@ class QueryResult { QueryExecutionStats stats_; }; +/** @brief A single field assignment: field_name = value. */ +struct SetAssignment { + std::string field_name; + Value value; + + SetAssignment(std::string field_name, Value value) + : field_name(std::move(field_name)), value(std::move(value)) {} +}; + +/** + * @brief Describes an UPDATE operation. + * + * Two modes: + * + * **Mode 1 — by ID** (no query engine involved, bare field names): + * @code + * UpdateQuery::on("User", 0).set("age", Value(31)).build(); + * @endcode + * + * **Mode 2 — by MATCH query** (alias-qualified SET fields): + * @code + * // Simple WHERE — update one schema: + * UpdateQuery::match( + * Query::from("u:User") + * .where("u.city", CompareOp::Eq, Value("NYC")) + * .build() + * ).set("u.status", Value("active")).build(); + * + * // Traversal — update multiple schemas: + * UpdateQuery::match( + * Query::from("u:User") + * .traverse("u", "WORKS_AT", "c:Company") + * .where("c.name", CompareOp::Eq, Value("Google")) + * .build() + * ).set("u.status", Value("employed")) + * .set("c.size", Value(int32_t(5001))) + * .build(); + * @endcode + * + * In Mode 2, each SET field must be alias-qualified ("alias.field"). + * The target aliases are derived from the SET assignments automatically. + */ +class UpdateQuery { + public: + class Builder; + + /** @brief Schema name (Mode 1 only). */ + [[nodiscard]] const std::string& schema() const { return schema_; } + + [[nodiscard]] const std::vector& assignments() const { + return assignments_; + } + + /** @brief Node ID for Mode 1 (direct update). */ + [[nodiscard]] const std::optional& node_id() const { + return node_id_; + } + + /** @brief The MATCH query for Mode 2 (query-based update). */ + [[nodiscard]] const std::optional& match_query() const { + return match_query_; + } + + /** @brief True if this is a Mode 2 (query-based) update. */ + [[nodiscard]] bool has_match() const { return match_query_.has_value(); } + + [[nodiscard]] UpdateType update_type() const { return update_type_; } + + /** + * @brief Extract unique aliases referenced in SET assignments. + * + * For Mode 2 only. Parses each "alias.field" to collect the set of + * distinct alias prefixes (e.g. {"u", "c"}). + */ + [[nodiscard]] std::vector target_aliases() const { + std::vector aliases; + for (const auto& a : assignments_) { + auto dot = a.field_name.find('.'); + if (dot == std::string::npos) continue; + std::string alias = a.field_name.substr(0, dot); + if (std::find(aliases.begin(), aliases.end(), alias) == aliases.end()) { + aliases.push_back(alias); + } + } + return aliases; + } + + /** @brief Mode 1 — target a specific node by schema + ID. */ + static Builder on(const std::string& schema, int64_t node_id) { + return {schema, node_id}; + } + + /** @brief Mode 2 — target nodes found by a MATCH query. */ + static Builder match(Query query) { return Builder{std::move(query)}; } + + class Builder { + public: + /** @brief Mode 1 constructor: update a specific node by ID. */ + Builder(std::string schema, int64_t node_id) + : schema_(std::move(schema)), node_id_(node_id) {} + + /** @brief Mode 2 constructor: update nodes found by a MATCH query. */ + explicit Builder(Query query) : match_query_(std::move(query)) {} + + /** + * @brief Add a field assignment. + * + * - Mode 1: bare name — set("age", Value(31)) + * - Mode 2: qualified — set("u.age", Value(31)) + */ + Builder& set(std::string field_name, Value value) { + assignments_.emplace_back(std::move(field_name), std::move(value)); + return *this; + } + + /** @brief Override the update type (default: SET). */ + Builder& type(UpdateType t) { + update_type_ = t; + return *this; + } + + /** @brief Build the immutable UpdateQuery (rvalue). */ + [[nodiscard]] UpdateQuery build() && { + if (assignments_.empty()) { + throw std::runtime_error( + "UpdateQuery must have at least one SET assignment"); + } + return UpdateQuery(std::move(schema_), std::move(assignments_), node_id_, + std::move(match_query_), update_type_); + } + + /** @brief Build the immutable UpdateQuery (lvalue). */ + [[nodiscard]] UpdateQuery build() & { + if (assignments_.empty()) { + throw std::runtime_error( + "UpdateQuery must have at least one SET assignment"); + } + return UpdateQuery(schema_, assignments_, node_id_, match_query_, + update_type_); + } + + private: + std::string schema_; + std::vector assignments_; + std::optional node_id_; + std::optional match_query_; + UpdateType update_type_ = UpdateType::SET; + }; + + private: + UpdateQuery(std::string schema, std::vector assignments, + std::optional node_id, std::optional match_query, + UpdateType update_type) + : schema_(std::move(schema)), + assignments_(std::move(assignments)), + node_id_(std::move(node_id)), + match_query_(std::move(match_query)), + update_type_(update_type) {} + + std::string schema_; + std::vector assignments_; + std::optional node_id_; + std::optional match_query_; + UpdateType update_type_; +}; + +/** @brief Result of an update operation. */ +struct UpdateResult { + int64_t updated_count = 0; ///< Number of nodes updated. + int64_t failed_count = 0; ///< Number of nodes that failed to update. + std::vector errors; ///< Error messages for failed updates. +}; + } // namespace tundradb #endif // QUERY_HPP diff --git a/src/core.cpp b/src/core.cpp index 9f8b3e2..f57e240 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -848,4 +848,197 @@ arrow::Result> Database::query( return result; } +// --------------------------------------------------------------------------- +// Database::update - dispatch to Mode 1 or Mode 2 +// --------------------------------------------------------------------------- +arrow::Result Database::update(const UpdateQuery& uq) { + if (uq.node_id().has_value()) { + return update_by_id(uq); + } + if (uq.has_match()) { + return update_by_match(uq); + } + return arrow::Status::Invalid( + "UpdateQuery must specify a node ID or a MATCH query"); +} + +// --------------------------------------------------------------------------- +// Mode 1: update a single node by schema + ID +// --------------------------------------------------------------------------- +arrow::Result Database::update_by_id(const UpdateQuery& uq) { + UpdateResult result; + + auto schema_result = schema_registry_->get(uq.schema()); + if (!schema_result.ok()) { + return arrow::Status::KeyError("Schema '", uq.schema(), "' not found"); + } + const auto& schema = schema_result.ValueOrDie(); + + // Resolve fields upfront - fail early on bad field names + std::vector, Value>> resolved; + resolved.reserve(uq.assignments().size()); + for (const auto& a : uq.assignments()) { + auto field = schema->get_field(a.field_name); + if (!field) { + return arrow::Status::Invalid( + "Field '", a.field_name, "' not found in schema '", uq.schema(), "'"); + } + resolved.emplace_back(field, a.value); + } + + const int64_t id = uq.node_id().value(); + if (const auto r = + update_node_fields(uq.schema(), id, resolved, uq.update_type()); + !r.ok()) { + result.failed_count++; + result.errors.push_back(uq.schema() + "(" + std::to_string(id) + + "): " + r.status().ToString()); + } else { + result.updated_count = 1; + } + return result; +} + +// --------------------------------------------------------------------------- +// Mode 2: find nodes via MATCH query, then batch-update each +// --------------------------------------------------------------------------- +arrow::Result Database::update_by_match(const UpdateQuery& uq) { + UpdateResult result; + const auto& match_query = uq.match_query().value(); + + // 1. Resolve alias -> schema mapping (declarations only, with validation) + ARROW_ASSIGN_OR_RAISE(auto alias_to_schema, resolve_alias_map(match_query)); + + // 2. Group SET assignments by alias: { alias -> (schema, [(Field,Value)]) } + struct AliasUpdate { + std::string schema_name; + std::vector, Value>> fields; + }; + std::unordered_map grouped; + + for (const auto& a : uq.assignments()) { + auto dot = a.field_name.find('.'); + if (dot == std::string::npos) { + return arrow::Status::Invalid( + "SET field '", a.field_name, + "' must be alias-qualified (e.g. u.age) in a MATCH-based update"); + } + std::string alias = a.field_name.substr(0, dot); + std::string bare_field = a.field_name.substr(dot + 1); + + auto it = alias_to_schema.find(alias); + if (it == alias_to_schema.end()) { + return arrow::Status::Invalid("Alias '", alias, + "' not found in MATCH query"); + } + + auto schema_result = schema_registry_->get(it->second); + if (!schema_result.ok()) { + return arrow::Status::KeyError("Schema '", it->second, "' not found"); + } + const auto& schema = schema_result.ValueOrDie(); + auto field = schema->get_field(bare_field); + if (!field) { + return arrow::Status::Invalid("Field '", bare_field, + "' not found in schema '", it->second, "'"); + } + + auto& entry = grouped[alias]; + if (entry.schema_name.empty()) entry.schema_name = it->second; + entry.fields.emplace_back(field, a.value); + } + + // 3. Build ID-only SELECT: we only need "u.id", "c.id", etc. + std::vector id_columns; + id_columns.reserve(grouped.size()); + for (const auto& alias : grouped | std::views::keys) { + id_columns.push_back(alias + ".id"); + } + Query id_query(match_query.from(), match_query.clauses(), + std::make_shared