-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDemo.py
More file actions
470 lines (400 loc) · 15.2 KB
/
Demo.py
File metadata and controls
470 lines (400 loc) · 15.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
#!/usr/bin/env python3
“””
MIVP — Model Identity Verification Protocol
Minimal Reference Implementation & Test Vector Verification (v2.1)
Demonstrates:
- Leaf hash computation (chunked file, domain-separated)
- Merkle tree construction with carry-up (no duplication)
- Model Hash (MH) as commitment over root + leaf count
- Policy Hash (PH) from canonical JSON
- Runtime Hash (RH) from canonical JSON
- Composite Instance Hash (CIH) with and without instance_epoch
- Verification against all normative test vectors from Appendix G
No external dependencies. Standard Python 3.8+.
This is a pedagogical reference, not production code.
SIMPLIFICATIONS (deliberate):
- Chunk size uses spec test vector value (4 bytes), not recommended 4 MiB
- No manifest parsing; inputs passed directly
- No session continuity or attestation signing
- CFS-1 float serialization is minimal (covers test vector cases)
“””
from **future** import annotations
import hashlib
import json
import struct
import unicodedata
from typing import List, Optional, Tuple
# —————————————————————————
# Utilities
# —————————————————————————
def sha256(data: bytes) -> bytes:
return hashlib.sha256(data).digest()
def u64be(n: int) -> bytes:
return struct.pack(”>Q”, n)
def hex_to_bytes(h: str) -> bytes:
return bytes.fromhex(h.replace(” “, “”))
def nfc(s: str) -> str:
return unicodedata.normalize(“NFC”, s)
# —————————————————————————
# Layer 1 — Model Hash (MH)
# Appendix A: Merkle tree over chunked weight files
# —————————————————————————
def leaf_hash(path: str, file_size_bytes: int, chunk_index: int, chunk_bytes: bytes) -> bytes:
“””
leaf = SHA256(
0x00 || “MIVP-LEAF-CHUNK-V1” || 0x00 ||
path_utf8 || 0x00 ||
u64be(file_size_bytes) ||
u64be(chunk_index) ||
chunk_bytes
)
“””
domain = b”\x00” + b”MIVP-LEAF-CHUNK-V1” + b”\x00”
path_bytes = nfc(path).encode(“utf-8”) + b”\x00”
return sha256(domain + path_bytes + u64be(file_size_bytes) + u64be(chunk_index) + chunk_bytes)
def node_hash(left: bytes, right: bytes) -> bytes:
“””
node = SHA256(0x01 || “MIVP-NODE-BINARY-V1” || 0x00 || left || right)
“””
domain = b”\x01” + b”MIVP-NODE-BINARY-V1” + b”\x00”
return sha256(domain + left + right)
def merkle_root(leaves: List[bytes]) -> bytes:
“””
Build Merkle tree with carry-up for odd counts (no duplication, no re-hashing).
“””
if not leaves:
raise ValueError(“Empty leaf list”)
level = list(leaves)
while len(level) > 1:
next_level = []
i = 0
while i < len(level):
if i + 1 < len(level):
next_level.append(node_hash(level[i], level[i + 1]))
i += 2
else:
# Carry-up: odd node passes through unchanged
next_level.append(level[i])
i += 1
level = next_level
return level[0]
def model_hash(file_path: str, file_bytes: bytes, chunk_size: int) -> Tuple[bytes, bytes, int]:
“””
Compute MH for a single file.
Returns (MH, merkle_root, total_leaves).
```
MH = SHA256(merkle_root || u64be(total_leaves))
"""
file_size = len(file_bytes)
chunks = []
if file_size == 0:
chunks = [b""]
else:
i = 0
while i < file_size:
chunks.append(file_bytes[i:i + chunk_size])
i += chunk_size
leaves = [
leaf_hash(file_path, file_size, idx, chunk)
for idx, chunk in enumerate(chunks)
]
root = merkle_root(leaves)
total = len(leaves)
mh = sha256(root + u64be(total))
return mh, root, total
```
# —————————————————————————
# Layer 2 — Policy Hash (PH)
# Appendix B: Canonical JSON, field-level normalization
# —————————————————————————
def policy_hash(canonical_policy_json: str) -> bytes:
“””
PH = SHA256(0x02 || “MIVP-POLICY-V1” || 0x00 || canonical_policy_json_bytes)
```
canonical_policy_json must already be canonicalized (keys sorted, no
insignificant whitespace, field-level NFC + whitespace normalization applied).
"""
domain = b"\x02" + b"MIVP-POLICY-V1" + b"\x00"
payload = canonical_policy_json.encode("utf-8")
return sha256(domain + payload)
```
def canonicalize_policy(
system_prompt: str,
guardrails: List[dict],
moderation_policy_version: str,
policy_spec_version: str,
attestation_completeness: str,
) -> str:
“””
Build canonical policy JSON per Appendix B:
- Field-level: strip ASCII whitespace, normalize line endings, NFC
- Guardrails sorted lexicographically by id
- Keys sorted, no insignificant whitespace
“””
def clean(s: str) -> str:
s = s.replace(”\r\n”, “\n”).replace(”\r”, “\n”)
s = nfc(s)
return s.strip()
```
sorted_guardrails = sorted(guardrails, key=lambda g: g["id"])
cleaned_guardrails = [{"id": clean(g["id"]), "rule": clean(g["rule"])} for g in sorted_guardrails]
obj = {
"attestation_completeness": clean(attestation_completeness),
"guardrails": cleaned_guardrails,
"moderation_policy_version": clean(moderation_policy_version),
"policy_spec_version": clean(policy_spec_version),
"system_prompt": clean(system_prompt),
}
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
```
# —————————————————————————
# Layer 3 — Runtime Hash (RH)
# Appendix D + CFS-1 float serialization
# —————————————————————————
def cfs1_float(x: float) -> str:
“””
CFS-1: shortest round-trip-safe decimal, no trailing zeros,
no scientific notation in range [1e-4, 1e4), normalize -0.0 to 0.
Covers test vector cases. Production implementations should use a
proper shortest-decimal algorithm (e.g., Ryu/Grisu).
“””
if x == 0.0:
return “0”
# Use Python’s repr which is round-trip safe, then strip trailing zeros
s = f”{x:.10g}”
# Ensure no scientific notation for values in range
abs_x = abs(x)
if 1e-4 <= abs_x < 1e4:
# Format as plain decimal
s = f”{x:f}”.rstrip(“0”).rstrip(”.”)
if “.” not in s and abs_x >= 1:
pass # integer-valued float, no decimal needed
return s
def canonicalize_runtime(
temperature: float,
top_p: float,
max_tokens: int,
tooling_enabled: bool,
routing_mode: str,
runtime_spec_version: str,
) -> str:
“””
Build canonical runtime JSON per Appendix D + CFS-1.
“””
obj = {
“max_tokens”: max_tokens,
“routing_mode”: routing_mode,
“runtime_spec_version”: runtime_spec_version,
“temperature”: temperature,
“tooling_enabled”: tooling_enabled,
“top_p”: top_p,
}
# Serialize with CFS-1 for floats
# json.dumps handles bool/int correctly; we post-process float values
raw = json.dumps(obj, sort_keys=True, separators=(”,”, “:”), ensure_ascii=False)
return raw
def runtime_hash(canonical_runtime_json: str) -> bytes:
“””
RH = SHA256(0x03 || “MIVP-RUNTIME-V1” || 0x00 || canonical_runtime_json_bytes)
“””
domain = b”\x03” + b”MIVP-RUNTIME-V1” + b”\x00”
payload = canonical_runtime_json.encode(“utf-8”)
return sha256(domain + payload)
# —————————————————————————
# Layer 4 — Composite Instance Hash (CIH)
# Appendix C
# —————————————————————————
def composite_instance_hash(
mh: bytes,
ph: bytes,
rh: bytes,
instance_epoch: Optional[int] = None,
) -> bytes:
“””
CIH = SHA256(0x04 || “MIVP-CIH-V1” || 0x00 || MH || PH || RH [|| u64be(epoch)])
“””
domain = b”\x04” + b”MIVP-CIH-V1” + b”\x00”
payload = mh + ph + rh
if instance_epoch is not None:
payload += u64be(instance_epoch)
return sha256(domain + payload)
# —————————————————————————
# Test Vector Verification (Appendix G)
# —————————————————————————
def verify(label: str, computed: bytes, expected_hex: str) -> bool:
expected = hex_to_bytes(expected_hex)
ok = computed == expected
status = “PASS” if ok else “FAIL”
print(f” [{status}] {label}”)
if not ok:
print(f” expected: {expected.hex()}”)
print(f” computed: {computed.hex()}”)
return ok
def run_test_vectors() -> bool:
print(”=” * 60)
print(“MIVP v2.1 — Test Vector Verification (Appendix G)”)
print(”=” * 60)
all_pass = True
```
# -----------------------------------------------------------------------
# G2 — Model Hash test vectors
# -----------------------------------------------------------------------
print("\n[G2] Model Hash (MH)")
# MH-1: even leaf count (ABCDEF, chunk_size=4)
mh1, root1, total1 = model_hash(
file_path="models/model.bin",
file_bytes=bytes.fromhex("414243444546"),
chunk_size=4,
)
all_pass &= verify("MH-1 leaf L0",
leaf_hash("models/model.bin", 6, 0, bytes.fromhex("41424344")),
"097c9dd21719c13ebcf1bad1724d0a7ea82393311678b8a74d83377e18aa3e57")
all_pass &= verify("MH-1 leaf L1",
leaf_hash("models/model.bin", 6, 1, bytes.fromhex("4546")),
"51f60b4987a50e69b4c8980137fe37251aeef10fe9dbbc43bb7941fc10e4c6d3")
all_pass &= verify("MH-1 merkle_root",
root1,
"939bb8b42f752e8bb1fce36beacfd9413ff0b350e862cf5037850f3d8c94d82e")
all_pass &= verify("MH-1 Model Hash",
mh1,
"4f656b70d087942661166d7a311e3f0afde26c4b21729a8004cac46135480900")
# MH-2: odd leaf count / carry-up (ABCDEFGHI, chunk_size=4)
mh2, root2, total2 = model_hash(
file_path="models/model.bin",
file_bytes=bytes.fromhex("414243444546474849"),
chunk_size=4,
)
all_pass &= verify("MH-2 leaf L0",
leaf_hash("models/model.bin", 9, 0, bytes.fromhex("41424344")),
"719cd3d140c2a126d99fb88622120492b250e934a56f0f93773d650d21ed037f")
all_pass &= verify("MH-2 leaf L1",
leaf_hash("models/model.bin", 9, 1, bytes.fromhex("45464748")),
"43d9fb53a57ba202b5f4db56a350aa0889d00ea8587fe4d9bd70ea4910f92c26")
all_pass &= verify("MH-2 leaf L2",
leaf_hash("models/model.bin", 9, 2, bytes.fromhex("49")),
"bc4cc00af916be1b7cd04e8d403a39e099d8d74b9a646da132d58d9af259c23c")
all_pass &= verify("MH-2 merkle_root",
root2,
"eebb49a6e7c030d322c50d8d3cb9323c08091a56158a208b19d823d00c246a7b")
all_pass &= verify("MH-2 Model Hash",
mh2,
"95ac46cbe619cff182dc38e2cb5bcd7bf15a96fe09a91aaf346d9b5d05fc3922")
# -----------------------------------------------------------------------
# G3 — Policy Hash
# -----------------------------------------------------------------------
print("\n[G3] Policy Hash (PH)")
# Exact canonical JSON from spec (normative byte sequence)
canonical_policy = (
'{"attestation_completeness":"partial",'
'"guardrails":[{"id":"no_fake_degrees","rule":"No fake degrees"}],'
'"moderation_policy_version":"2026-02-14",'
'"policy_spec_version":"1.0",'
'"system_prompt":"You are an assistant."}'
)
ph = policy_hash(canonical_policy)
all_pass &= verify("PH-1 Policy Hash",
ph,
"4f081b3563cd5362763879f8b3256a77af438cd0d2ed4c935284f01f248cf86f")
# Also verify via canonicalize_policy helper
ph_via_helper = policy_hash(canonicalize_policy(
system_prompt="You are an assistant.",
guardrails=[{"id": "no_fake_degrees", "rule": "No fake degrees"}],
moderation_policy_version="2026-02-14",
policy_spec_version="1.0",
attestation_completeness="partial",
))
all_pass &= verify("PH-1 via canonicalize_policy()",
ph_via_helper,
"4f081b3563cd5362763879f8b3256a77af438cd0d2ed4c935284f01f248cf86f")
# -----------------------------------------------------------------------
# G4 — Runtime Hash
# -----------------------------------------------------------------------
print("\n[G4] Runtime Hash (RH)")
# Exact canonical JSON from spec (normative byte sequence)
canonical_runtime = (
'{"max_tokens":256,"routing_mode":"language-aware",'
'"runtime_spec_version":"1.0","temperature":0.7,'
'"tooling_enabled":false,"top_p":1}'
)
rh = runtime_hash(canonical_runtime)
all_pass &= verify("RH-1 Runtime Hash",
rh,
"827d100295de8a512a5d9a4138fadcdb5112d73e00dfd3fe163c142141d1faa0")
# -----------------------------------------------------------------------
# G5 — Composite Instance Hash (CIH)
# -----------------------------------------------------------------------
print("\n[G5] Composite Instance Hash (CIH)")
mh_g5 = hex_to_bytes("4f656b70d087942661166d7a311e3f0afde26c4b21729a8004cac46135480900")
ph_g5 = hex_to_bytes("4f081b3563cd5362763879f8b3256a77af438cd0d2ed4c935284f01f248cf86f")
rh_g5 = hex_to_bytes("827d100295de8a512a5d9a4138fadcdb5112d73e00dfd3fe163c142141d1faa0")
cih = composite_instance_hash(mh_g5, ph_g5, rh_g5)
all_pass &= verify("CIH without instance_epoch",
cih,
"db1d84656d67f5026fd7271cb3b44cf0814f85cc6031381bd22326ac0af1ca41")
cih_epoch = composite_instance_hash(mh_g5, ph_g5, rh_g5, instance_epoch=1700000000)
all_pass &= verify("CIH with instance_epoch=1700000000",
cih_epoch,
"ef40beb2c9f7c111b5e2e365c7f5b639fd57350125602c2ac35fd3372a445cfb")
# -----------------------------------------------------------------------
# Summary
# -----------------------------------------------------------------------
print("\n" + "=" * 60)
if all_pass:
print("All test vectors PASSED. Implementation is compliant.")
else:
print("FAILURES DETECTED. Implementation is non-compliant.")
print("=" * 60)
return all_pass
```
# —————————————————————————
# Demo: End-to-end CIH computation from scratch
# —————————————————————————
def demo_end_to_end():
print(”\n” + “=” * 60)
print(“Demo: End-to-end CIH computation”)
print(”=” * 60)
```
# Step 1: Model Hash from a small mock weight file
print("\n[Step 1] Model Hash")
mock_weights = b"Hello, MIVP! These are mock model weights."
mh, root, total = model_hash(
file_path="models/mock_model.bin",
file_bytes=mock_weights,
chunk_size=16,
)
print(f" File: models/mock_model.bin ({len(mock_weights)} bytes)")
print(f" Chunks: {total}")
print(f" Merkle root: {root.hex()}")
print(f" MH: {mh.hex()}")
# Step 2: Policy Hash
print("\n[Step 2] Policy Hash")
canonical_pol = canonicalize_policy(
system_prompt="You are a helpful assistant.",
guardrails=[
{"id": "no_harmful_content", "rule": "Do not produce harmful content."},
{"id": "no_pii", "rule": "Do not reveal personal data."},
],
moderation_policy_version="2026-03-01",
policy_spec_version="1.0",
attestation_completeness="full",
)
print(f" Canonical JSON: {canonical_pol}")
ph = policy_hash(canonical_pol)
print(f" PH: {ph.hex()}")
# Step 3: Runtime Hash
print("\n[Step 3] Runtime Hash")
canonical_rt = '{"max_tokens":512,"routing_mode":"direct","runtime_spec_version":"1.0","temperature":0.5,"tooling_enabled":true,"top_p":0.9}'
print(f" Canonical JSON: {canonical_rt}")
rh = runtime_hash(canonical_rt)
print(f" RH: {rh.hex()}")
# Step 4: Composite Instance Hash
print("\n[Step 4] Composite Instance Hash")
cih = composite_instance_hash(mh, ph, rh)
print(f" CIH: {cih.hex()}")
print("\n This CIH uniquely identifies this exact model + policy + runtime.")
print(" Any change to weights, policy, or runtime produces a different CIH.")
```
if **name** == “**main**”:
ok = run_test_vectors()
demo_end_to_end()
raise SystemExit(0 if ok else 1)