2020//
2121// Phase 9b: MPMC ColumnBatch Event Bus.
2222
23+ // / @file column_batch.hpp
24+ // / @brief Column-major batch of feature rows for zero-copy tensor wrapping
25+ // / and WAL serialization in ML inference pipelines.
26+
2327#pragma once
2428
2529#include " signet/error.hpp"
3842
3943namespace signet ::forge {
4044
41- // Convenience alias
45+ // / Convenience alias for TensorDataType (shorter schema declarations).
4246using TDT = TensorDataType;
4347
4448// ============================================================================
4549// ColumnDesc — schema descriptor for one column in a ColumnBatch
4650// ============================================================================
4751
52+ // / Describes a single column in a ColumnBatch schema.
4853struct ColumnDesc {
49- std::string name;
50- TensorDataType dtype = TensorDataType::FLOAT64; // /< physical storage type
54+ std::string name; // /< Column name (e.g. "price", "volume")
55+ TensorDataType dtype = TensorDataType::FLOAT64; // /< Physical storage type (always stored as double internally)
5156};
5257
5358// ============================================================================
@@ -57,17 +62,25 @@ struct ColumnDesc {
5762// wrapping. Each column is a contiguous std::vector<double>.
5863// ============================================================================
5964
65+ // / A column-major batch of feature rows for ML inference and WAL serialization.
66+ // /
67+ // / Data is stored in column-major layout (columns_[col][row]) so each column
68+ // / is a contiguous double array suitable for zero-copy wrapping as a TensorView
69+ // / or ONNX OrtValue without transposition.
70+ // /
71+ // / Typically shared across threads via SharedColumnBatch (std::shared_ptr).
72+ // / @see SharedColumnBatch, make_column_batch, EventBus
6073class ColumnBatch {
6174public:
6275 // -------------------------------------------------------------------------
6376 // Producer-side metadata (set before publishing)
6477 // -------------------------------------------------------------------------
6578
66- std::string source_id; // /< exchange / feed identifier
67- std::string symbol; // /< instrument symbol
68- int64_t seq_first = 0 ; // /< first WAL sequence in this batch
69- int64_t seq_last = 0 ; // /< last WAL sequence in this batch
70- int64_t created_ns = 0 ; // /< batch creation timestamp (ns since epoch)
79+ std::string source_id; // /< Exchange / feed identifier
80+ std::string symbol; // /< Instrument symbol
81+ int64_t seq_first = 0 ; // /< First WAL sequence number in this batch
82+ int64_t seq_last = 0 ; // /< Last WAL sequence number in this batch
83+ int64_t created_ns = 0 ; // /< Batch creation timestamp (ns since epoch)
7184
7285 // -------------------------------------------------------------------------
7386 // Factory
@@ -92,11 +105,12 @@ class ColumnBatch {
92105 return b;
93106 }
94107
108+ // / Default constructor (empty batch, no schema).
95109 ColumnBatch () = default ;
96- ColumnBatch (ColumnBatch&&) = default ;
97- ColumnBatch& operator =(ColumnBatch&&) = default ;
98- ColumnBatch (const ColumnBatch&) = default ;
99- ColumnBatch& operator =(const ColumnBatch&) = default ;
110+ ColumnBatch (ColumnBatch&&) = default ; // /< Move constructor.
111+ ColumnBatch& operator =(ColumnBatch&&) = default ; // /< Move assignment.
112+ ColumnBatch (const ColumnBatch&) = default ; // /< Copy constructor.
113+ ColumnBatch& operator =(const ColumnBatch&) = default ; // /< Copy assignment.
100114
101115 // -------------------------------------------------------------------------
102116 // Build API — called from producer thread
@@ -115,11 +129,17 @@ class ColumnBatch {
115129 return expected<void >{};
116130 }
117131
132+ // / Append one row from an initializer list (e.g. `push_row({1.0, 2.0})`).
133+ // / @param values Feature values (must match num_columns()).
134+ // / @return Error on schema mismatch.
118135 [[nodiscard]] expected<void > push_row (std::initializer_list<double > values) {
119136 std::vector<double > tmp (values);
120137 return push_row (tmp.data (), tmp.size ());
121138 }
122139
140+ // / Append one row from a vector.
141+ // / @param values Feature values (must match num_columns()).
142+ // / @return Error on schema mismatch.
123143 [[nodiscard]] expected<void > push_row (const std::vector<double >& values) {
124144 return push_row (values.data (), values.size ());
125145 }
@@ -128,10 +148,14 @@ class ColumnBatch {
128148 // Query API — called from consumer / ML thread
129149 // -------------------------------------------------------------------------
130150
151+ // / Number of rows currently in the batch.
131152 [[nodiscard]] size_t num_rows () const noexcept { return num_rows_; }
153+ // / Number of columns defined by the schema.
132154 [[nodiscard]] size_t num_columns () const noexcept { return schema_.size (); }
155+ // / True if the batch contains no rows.
133156 [[nodiscard]] bool empty () const noexcept { return num_rows_ == 0 ; }
134157
158+ // / The schema (column descriptors) this batch was created with.
135159 [[nodiscard]] const std::vector<ColumnDesc>& schema () const noexcept {
136160 return schema_;
137161 }
@@ -163,6 +187,13 @@ class ColumnBatch {
163187 // buffer. output_dtype defaults to FLOAT32 for ONNX compatibility.
164188 // -------------------------------------------------------------------------
165189
190+ // / Assemble all columns into a single 2D [rows x cols] OwnedTensor.
191+ // /
192+ // / Uses BatchTensorBuilder internally. The default output type is FLOAT32
193+ // / for direct ONNX Runtime consumption.
194+ // /
195+ // / @param output_dtype Desired element type (default FLOAT32).
196+ // / @return OwnedTensor of shape {num_rows, num_columns}, or Error if empty.
166197 [[nodiscard]] expected<OwnedTensor> as_tensor (
167198 TensorDataType output_dtype = TensorDataType::FLOAT32) const {
168199
@@ -191,6 +222,14 @@ class ColumnBatch {
191222 // [float64 values × num_rows] × num_columns (column-major)
192223 // -------------------------------------------------------------------------
193224
225+ // / Serialize the batch into a WAL StreamRecord.
226+ // /
227+ // / The binary payload uses little-endian column-major format. The default
228+ // / type_id 0x434F4C42 ("COLB") identifies ColumnBatch records in the WAL.
229+ // /
230+ // / @param timestamp_ns Override timestamp (0 = use created_ns).
231+ // / @param type_id Record type tag for WAL routing.
232+ // / @return StreamRecord with the serialized batch payload.
194233 [[nodiscard]] StreamRecord to_stream_record (
195234 int64_t timestamp_ns = 0 ,
196235 uint32_t type_id = 0x434F4C42u /* "COLB"*/ ) const {
@@ -241,6 +280,13 @@ class ColumnBatch {
241280 // Deserialise a StreamRecord payload back into a ColumnBatch
242281 // -------------------------------------------------------------------------
243282
283+ // / Deserialize a StreamRecord payload back into a ColumnBatch.
284+ // /
285+ // / Inverse of to_stream_record(). Reads the binary column-major format
286+ // / and reconstructs the schema, columns, and row data.
287+ // /
288+ // / @param rec StreamRecord previously produced by to_stream_record().
289+ // / @return Reconstructed ColumnBatch, or Error on truncated/corrupt payload.
244290 [[nodiscard]] static expected<ColumnBatch> from_stream_record (
245291 const StreamRecord& rec) {
246292
@@ -299,11 +345,14 @@ class ColumnBatch {
299345 // Utility
300346 // -------------------------------------------------------------------------
301347
348+ // / Clear all row data while preserving the schema.
302349 void clear () {
303350 for (auto & col : columns_) col.clear ();
304351 num_rows_ = 0 ;
305352 }
306353
354+ // / Pre-allocate storage for the given number of rows in each column.
355+ // / @param rows Number of rows to reserve capacity for.
307356 void reserve (size_t rows) {
308357 for (auto & col : columns_) col.reserve (rows);
309358 }
@@ -318,6 +367,8 @@ class ColumnBatch {
318367// SharedColumnBatch — the unit transferred between threads
319368// ---------------------------------------------------------------------------
320369
370+ // / Thread-safe shared pointer to a ColumnBatch -- the unit transferred
371+ // / between producer and consumer threads via EventBus.
321372using SharedColumnBatch = std::shared_ptr<ColumnBatch>;
322373
323374// / Convenience factory: create a shared batch with a given schema.
0 commit comments