From a2f43d5c68bedc30e5ebe6f7fd5b2ed294f768cb Mon Sep 17 00:00:00 2001 From: rawbytedev <95090911+rawbytedev@users.noreply.github.com> Date: Mon, 15 Dec 2025 20:46:00 +0100 Subject: [PATCH] Refactor journal batch handling and improve documentation --- doc.md | 4 -- internal/journal/README.md | 85 ++++++++++++++++++++++++++++++ internal/journal/journal.go | 90 ++++++++++++++++---------------- internal/journal/journal_test.go | 16 +++--- internal/journal/types.go | 26 ++++++--- 5 files changed, 157 insertions(+), 64 deletions(-) create mode 100644 internal/journal/README.md diff --git a/doc.md b/doc.md index f904991..a964db5 100644 --- a/doc.md +++ b/doc.md @@ -17,7 +17,3 @@ SealBatch handles operations such as batching, generating merkletree, storing in and anchoring receipt on solana ## Usage - -- Improve Journaling -- Improve DB -- Add Benchmarking \ No newline at end of file diff --git a/internal/journal/README.md b/internal/journal/README.md new file mode 100644 index 0000000..2da7bdc --- /dev/null +++ b/internal/journal/README.md @@ -0,0 +1,85 @@ +# Journal + +On-chain we store: +batchCommitment = hash(rootHash || totalEntries || timestampRangeHash || version) + +- rootHash = Merkle root of all entries +- totalEntries = number of entries in batch +- timestampRangeHash = hash(startTimestamp || endTimestamp) +- version = protocol version + +Off-Chain Key-Value Database Schema + +1. Batch Registry (registry:batch:{commitment}) + +```json +{ + "rootHash": "0xabc123...", + "totalEntries": 150, + "startTime": 1633023456000, + "endTime": 1633023556000, + "treeDepth": 8, + "leafFormat": "sha256(data || position)", + "commitment": "batchCommitment (on-chain hash)", + "status": "finalized", + "createdAt": 1633023557000 +} +``` + +2.Entry Storage (Content-Addressable) + +```t +Key: e:{dataHash} +Value: { + "data": "actual entry content", + "position": 3, + "batchCommitment": "batchCommitment", + "timestamp": 1633023456050, + "metadata": {} // optional additional data +} + +``` + +3.Position Index (Fast lookup by position) + +```t +Key: p:{batchCommitment}:{position:08d} +Value: dataHash // points to entry key +``` + +4.Merkle Proof Cache (Optimized for verification) (Not fully implemented) + +```t +Key: proof:{batchCommitment}:{position} +Value: { + "leafHash": "hash of entry at position", + "siblings": ["sibling1", "sibling2", ...], + "path": [0,1,0,...] // 0=left,1=right +} +``` + +5.Reverse Lookup (Find batch by entry) + +```t +Key: r:{dataHash} +Value: batchCommitment +``` + +Complete Key Structure + +```python +# All keys used prefixes +BATCH_PREFIX = "b:" +ENTRY_PREFIX = "e:" +POSITION_PREFIX = "p:" +PROOF_PREFIX = "proof:" +REVERSE_PREFIX = "r:" +TIME_PREFIX = "t:" + +# Example keys: +batch_key = f"b:{batchCommitment}" +entry_key = f"e:{dataHash}" +position_key = f"p:{batchCommitment}:{position:08d}" +proof_key = f"proof:{batchCommitment}:{position}" +reverse_key = f"r:{dataHash}" +``` diff --git a/internal/journal/journal.go b/internal/journal/journal.go index 194c608..f9c8f63 100644 --- a/internal/journal/journal.go +++ b/internal/journal/journal.go @@ -30,7 +30,6 @@ func NewJournalCache(ctx *context.Context) JournalStore { return &JournalCache{ctx: ctx, store: db} } - // called by main // testing // this is received by ai @@ -83,6 +82,12 @@ func (res *CommitResult) Encode() ([]byte, error) { func (res *CommitResult) Decode(data []byte) error { return res.ctx.Encoder.Decode(data, res) } +func (res *Commitment) Encode() ([]byte, error) { + return res.ctx.Encoder.Encode(res) +} +func (res *Commitment) Decode(data []byte) error { + return res.ctx.Encoder.Decode(data, res) +} // set based on configuration // that means if config are changed during run @@ -187,42 +192,50 @@ func (j *JournalCache) BuildTree() error { return nil } -// this is related to commitresult needed to mint and anchor -// run after calling buildtree and before committing onto database -// needs len(j.post) j.treeroot timewindow -func (j *JournalCache) BatchInsert() (*CommitResult, error) { +// prepares a batch +func (j *JournalCache) Batch() (*CommitResult, error) { + if uint32(len(j.Post)) == 0 { + if j.commitRes != nil { + return j.commitRes, nil + } + return nil, nil + } batch := CommitResult{ ctx: j.ctx, Root: [32]byte(j.treeroot), Count: uint32(len(j.Post)), WindowsStart: j.Post[0].GetTimestamp(), WindowsEnd: j.Post[len(j.Post)-1].GetTimestamp(), + version: "v1", } - // encode root + count enc, err := batch.Encode() - fmt.Print("Logging encoded") - fmt.Print(enc) if err != nil { return &CommitResult{}, err } - // derive hash from both - // us it as id - batch.BatchID = hex.EncodeToString(j.hash(enc)) - newenc, err := batch.Encode() - j.batchid = batch.BatchID + batchcommitment := j.ctx.Hasher.Sum(enc) + batchdata := &Commitment{ + ctx: j.ctx, + Roothash: [32]byte(j.treeroot), + Count: uint32(len(j.Post)), + WindowsStart: j.Post[0].GetTimestamp(), + WindowsEnd: j.Post[len(j.Post)-1].GetTimestamp(), + commitment: batchcommitment, + } + data, err := batchdata.Encode() if err != nil { return &CommitResult{}, err } - return &batch, j.store.Put(j.ctx.Hasher.Sum([]byte(batch.BatchID)), newenc) + j.batchid = batchcommitment + batch.batchID = string(batchcommitment) + j.commitRes = &batch + return &batch, j.store.Put(fmt.Appendf(nil, "b:%x", batchcommitment), data) } -// only store post Entries -// entry are rehashed -// pattern -// chk:%s (checksum) -> PostEntry -// batch:%s (batchid) -> CommitResult -// seq:%s:%s (batchid) (n) -> checksum func (j *JournalCache) Commit() error { + _, err := j.Batch() + if err != nil { + return fmt.Errorf("%s", err) + } // j.Post get zerro when it goes low // we can't get size from it at that point // at this point seems like j contents get corrupted? need to investigate @@ -246,12 +259,15 @@ func (j *JournalCache) largeCommit() error { if err != nil { return err } - err = j.store.BatchPut(j.hash(fmt.Appendf(nil, "chk:%s", entry.GetID())), enc) + err = j.store.BatchPut(fmt.Appendf(nil, "e:%x", entry.GetID()), enc) if err != nil { return err } - //use j.batchid here to test bug/ replace with batchid below - err = j.store.BatchPut(j.hash(fmt.Appendf(nil, "seq:%s:%d", batchid, i)), []byte(entry.GetID())) + err = j.store.BatchPut(fmt.Appendf(nil, "p:%x:%d", batchid, i), []byte(entry.GetID())) + if err != nil { + return err + } + err = j.store.BatchPut(fmt.Appendf(nil, "r:%s", entry.GetID()), batchid) if err != nil { return err } @@ -259,7 +275,7 @@ func (j *JournalCache) largeCommit() error { // for testing the problem // replace size-1 with len(j.Post) // might be because it's a pointer? - // can be done after loop + // can be done after loop if i == size-1 { err = j.store.BatchPut(nil, nil) if err != nil { @@ -275,17 +291,16 @@ func (j *JournalCache) largeCommit() error { // hash it according to type before calling it func (j *JournalCache) Get(id string) ([]byte, error) { item, err := hex.DecodeString(id) - obj := j.ctx.Hasher.Sum(item) if err != nil { - return []byte{}, err + return nil, err } - return j.store.Get(obj) + return j.store.Get(item) } // clean everything / for now it can only clear func (j *JournalCache) RoolBack() { j.Post = j.Post[:0] - j.batchid = "" + j.batchid = j.batchid[:0] j.treeroot = nil } func (j *JournalCache) Close() error { @@ -295,25 +310,12 @@ func (j *JournalCache) Close() error { // small Format implementation func Format(s string, opts ...RetrieveOptions) string { - // do not support multiple options yet - // for future use - /* - if len(opts) == 1 { - switch opts[0] { - case Checksum: - d := hex.EncodeToString((fmt.Appendf(nil, "chk:%s", s))) - return d - default: - d := hex.EncodeToString((fmt.Appendf(nil, "chk:%s", s))) - return d - } - }*/ - return hex.EncodeToString(fmt.Appendf(nil, "chk:%s", s)) + return hex.EncodeToString(fmt.Appendf(nil, "e:%x", s)) } func FormatSeq(s string, n int) string { - return hex.EncodeToString(fmt.Appendf(nil, "seq:%s:%d", s, n)) + return hex.EncodeToString(fmt.Appendf(nil, "p:%x:%d", []byte(s), n)) } func FormatBatch(s string) string { - return hex.EncodeToString([]byte(s)) + return hex.EncodeToString(fmt.Appendf(nil, "b:%x", s)) } diff --git a/internal/journal/journal_test.go b/internal/journal/journal_test.go index 1e59637..00be091 100644 --- a/internal/journal/journal_test.go +++ b/internal/journal/journal_test.go @@ -121,6 +121,10 @@ func TestJournalInsert(t *testing.T) { if err != nil { t.Fatal(err) } + _, err = journal.Batch() + if err != nil { + t.Fatal(err) + } err = journal.Commit() if err != nil { t.Fatal(err) @@ -239,10 +243,6 @@ func TestJournalInsertGet(t *testing.T) { } } -// error when trying to use in -// tempdir -// it has to do with how batcher handles write? -// The process cannot access the file because it is being used by another process. func TestBatchQuery(t *testing.T) { new := true if new { @@ -273,7 +273,7 @@ func TestBatchQuery(t *testing.T) { if err != nil { t.Fatal(err) } - com, err := journal.BatchInsert() + com, err := journal.Batch() if err != nil { t.Fatal(err) } @@ -301,7 +301,7 @@ func TestBatchQuery(t *testing.T) { for i := range com.Count { // retrieving checksum from batchID and at index i // the comparing with records - data, err := journal.Get(FormatSeq(com.BatchID, int(i))) + data, err := journal.Get(FormatSeq(com.batchID, int(i))) if err != nil { t.Log(err) t.Fatal("sequence while using Format for Seq") @@ -313,7 +313,7 @@ func TestBatchQuery(t *testing.T) { t.Log("Retrieving Batch") var v CommitResult v.ctx = &ctx - data, err := journal.Get(FormatBatch(com.BatchID)) + data, err := journal.Get(FormatBatch(com.batchID)) if err != nil { t.Fatal(err) } @@ -331,7 +331,7 @@ func TestBatchQuery(t *testing.T) { } else { // only peform query doesn't write to db _ = CommitResult{ - BatchID: "09dd1d47d7f0e5dfac278513a723b6d424558669feb014aecf5afce040c18211", + batchID: "09dd1d47d7f0e5dfac278513a723b6d424558669feb014aecf5afce040c18211", Root: [32]byte{89, 82, 203, 230, 157, 145, 229, 24, 119, 35, 162, 39, 108, 37, 209, 71, 3, 171, 242, 49, 6, 1, 84, 104, 252, 65, 22, 173, 7, 180, 233, 189}, Count: 0x3, } diff --git a/internal/journal/types.go b/internal/journal/types.go index c2a6b74..f69c1aa 100644 --- a/internal/journal/types.go +++ b/internal/journal/types.go @@ -21,16 +21,25 @@ type JournalStore interface { Entries() []JournalEntry BuildTree() error Get(id string) ([]byte, error) - BatchInsert() (*CommitResult, error) - Close() error // shutdows + Batch() (*CommitResult, error) // used for manual batch creation + Close() error // shutdows } type CommitResult struct { ctx *context.Context - BatchID string + batchID string Root [32]byte Count uint32 WindowsStart time.Time // first j.Port // assuming that it is ordered WindowsEnd time.Time // last j.Post + version string +} +type Commitment struct { + ctx *context.Context // anonymous + Roothash [32]byte + Count uint32 + WindowsStart time.Time + WindowsEnd time.Time + commitment []byte } // Default format when received / Unsafe @@ -79,9 +88,10 @@ func NewLocalStorage(ctx *context.Context) (database.StorageDB, error) { // this avoid having to recompute tree if something fails along the way type JournalCache struct { - ctx *context.Context - store database.StorageDB - Post []JournalEntry - treeroot []byte - batchid string + ctx *context.Context + store database.StorageDB + Post []JournalEntry + treeroot []byte + batchid []byte + commitRes *CommitResult }