Skip to content

HOL-Light: Add HOL Light poly_use_hint proofs for AArch64#1037

Merged
hanno-becker merged 1 commit into
mainfrom
hol-light-aarch64-use-hint
May 2, 2026
Merged

HOL-Light: Add HOL Light poly_use_hint proofs for AArch64#1037
hanno-becker merged 1 commit into
mainfrom
hol-light-aarch64-use-hint

Conversation

@jakemas
Copy link
Copy Markdown
Contributor

@jakemas jakemas commented Apr 13, 2026

Resolves:

Summary

Proof times are decent ~15min for _32 and ~22min for _88.

@jakemas jakemas requested a review from a team as a code owner April 13, 2026 16:54
@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch from 2053888 to 0b42385 Compare April 13, 2026 17:02
@jakemas jakemas marked this pull request as draft April 13, 2026 17:12
@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch from 0b42385 to c5c76af Compare April 13, 2026 17:21
@jakemas jakemas marked this pull request as ready for review April 13, 2026 17:52
@oqs-bot
Copy link
Copy Markdown
Contributor

oqs-bot commented Apr 13, 2026

CBMC Results (ML-DSA-87)

Full Results (200 proofs)
Proof Status Current Previous Change
**TOTAL** 2298s 2497s -8.0%
polyvecl_pointwise_acc_montgomery_c 320s 411s -22%
sign_verify_internal 248s 272s -9%
polyvec_matrix_expand 163s 175s -7%
rej_uniform_native 115s 131s -12%
mld_invntt_layer 90s 95s -5%
poly_pointwise_montgomery_c 89s 102s -13%
mld_ct_memcmp 74s 81s -9%
mld_attempt_signature_generation 59s 60s -2%
polyvec_matrix_expand_serial 58s 59s -2%
sign_keypair_internal 50s 54s -7%
mld_ntt_layer 42s 45s -7%
sign_signature_internal 33s 34s -3%
polyvec_matrix_pointwise_montgomery 32s 28s +14%
sign_pk_from_sk 32s 33s -3%
polyveck_power2round 31s 36s -14%
fqmul 26s 31s -16%
keccakf1600x4_permute_native 22s 24s -8%
polyvec_matrix_pointwise_montgomery_yvec 18s 21s -14%
rej_uniform_c 18s 21s -14%
mld_ntt_butterfly_block 16s 17s -6%
poly_uniform_4x 16s 14s +14%
rej_uniform 16s 18s -11%
poly_chknorm_c 15s 16s -6%
poly_uniform_eta_4x 15s 11s +36%
mld_check_pct 13s 14s -7%
poly_add 13s 13s +0%
polyt0_unpack 13s 15s -13%
polyveck_add 13s 13s +0%
polyveck_use_hint 13s 14s -7%
keccak_absorb_once_x4 11s 10s +10%
polyveck_decompose 11s 11s +0%
polyveck_ntt 11s 12s -8%
poly_invntt_tomont_c 10s 9s +11%
poly_power2round 10s 8s +25%
polyveck_pointwise_poly_montgomery 9s 10s -10%
keccak_absorb 8s 6s +33%
keccakf1600_permute 8s 9s -11%
mld_sample_s1_s2 8s 7s +14%
pointwise_acc_native_aarch64 8s 7s +14%
keccakf1600_permute_native 7s 9s -22%
mld_compute_pack_z 7s 8s -12%
pointwise_acc_native_x86_64 7s 9s -22%
poly_decompose_c 7s 8s -12%
polyeta_unpack 7s 5s +40%
polyveck_chknorm 7s 4s +75%
polyveck_reduce 7s 5s +40%
polyveck_shiftl 7s 7s +0%
polyvecl_ntt 7s 6s +17%
sign 7s 7s +0%
sign_verify_pre_hash_shake256 7s 5s +40%
poly_shiftl 6s 3s +100%
polyt0_pack 6s 3s +100%
polyveck_sub 6s 9s -33%
intt_native_x86_64 5s 5s +0%
mld_prepare_domain_separation_prefix 5s 7s -29%
mld_sample_s1_s2_serial 5s 6s -17%
montgomery_reduce 5s 5s +0%
nttunpack_native_x86_64 5s 4s +25%
poly_chknorm_native 5s 2s +150%
poly_decompose 5s 3s +67%
poly_ntt 5s 4s +25%
poly_ntt_c 5s 1s +400%
poly_uniform_eta 5s 3s +67%
polyveck_caddq 5s 5s +0%
polyveck_invntt_tomont 5s 5s +0%
polyz_unpack 5s 4s +25%
polyz_unpack_17_native_aarch64 5s 3s +67%
sign_open 5s 4s +25%
sign_signature 5s 3s +67%
sign_signature_pre_hash_internal 5s 6s -17%
sign_signature_pre_hash_shake256 5s 4s +25%
sign_verify_pre_hash_internal 5s 6s -17%
unpack_hints 5s 4s +25%
decompose 4s 3s +33%
keccak_squeezeblocks_x4 4s 5s -20%
make_hint 4s 3s +33%
ntt_native_x86_64 4s 2s +100%
pack_sk_rho_key_tr_s2_t0 4s 4s +0%
poly_make_hint 4s 2s +100%
poly_pointwise_montgomery 4s 2s +100%
poly_sub 4s 3s +33%
poly_uniform_gamma1 4s 3s +33%
poly_use_hint_c 4s 2s +100%
polyvec_matrix_pointwise_montgomery_row 4s 3s +33%
polyveck_pack_eta 4s 1s +300%
polyveck_unpack_t0 4s 2s +100%
polyvecl_chknorm 4s 4s +0%
polyvecl_pointwise_acc_montgomery_native 4s 3s +33%
polyvecl_uniform_gamma1 4s 5s -20%
polyz_pack 4s 4s +0%
polyz_unpack_19_native_aarch64 4s 1s +300%
polyz_unpack_c 4s 5s -20%
rej_eta_c 4s 4s +0%
rej_eta_native 4s 3s +33%
shake256_init 4s 5s -20%
shake256x4_squeezeblocks 4s 1s +300%
sign_verify 4s 7s -43%
sign_verify_extmu 4s 3s +33%
sk_s1hat_get_poly 4s 3s +33%
sk_t0hat_get_poly 4s 3s +33%
unpack_sk_s2hat 4s 3s +33%
unpack_sk_t0hat 4s 3s +33%
caddq 3s 5s -40%
keccak_f1600_x4_native_aarch64_v84a 3s 2s +50%
keccak_f1600_x4_native_aarch64_v8a_scalar_hybrid 3s 1s +200%
keccak_f1600_x4_native_aarch64_v8a_v84a_scalar_hybrid 3s 2s +50%
keccak_init 3s 1s +200%
keccak_squeeze 3s 3s +0%
keccakf1600x4_extract_bytes 3s 1s +200%
keccakf1600x4_xor_bytes 3s 4s -25%
mld_ct_abs_i32 3s 2s +50%
mld_ct_cmask_nonzero_u32 3s 3s +0%
mld_h 3s 4s -25%
mld_polymat_expand_entry 3s 3s +0%
pack_pk 3s 3s +0%
pack_sk_s1 3s 2s +50%
pointwise_native_aarch64 3s 3s +0%
poly_caddq_c 3s 5s -40%
poly_caddq_native 3s 4s -25%
poly_caddq_native_aarch64 3s 3s +0%
poly_challenge 3s 5s -40%
poly_chknorm 3s 2s +50%
poly_chknorm_native_aarch64 3s 4s -25%
poly_decompose_native 3s 3s +0%
poly_invntt_tomont 3s 2s +50%
poly_invntt_tomont_native 3s 2s +50%
poly_ntt_native 3s 4s -25%
poly_permute_bitrev_to_custom_optional 3s 3s +0%
poly_reduce 3s 2s +50%
poly_uniform 3s 3s +0%
poly_uniform_gamma1_4x 3s 5s -40%
polyeta_pack 3s 2s +50%
polyveck_pack_t0 3s 5s -40%
polyveck_pack_w1 3s 2s +50%
polyveck_unpack_eta 3s 3s +0%
polyvecl_pointwise_acc_montgomery 3s 4s -25%
polyvecl_uniform_gamma1_serial 3s 4s -25%
polyvecl_unpack_eta 3s 4s -25%
polyz_unpack_native 3s 4s -25%
shake128_release 3s 3s +0%
shake256_absorb 3s 3s +0%
shake256_release 3s 4s -25%
shake256_squeeze 3s 2s +50%
sign_keypair 3s 4s -25%
unpack_pk 3s 4s -25%
unpack_sk 3s 4s -25%
unpack_sk_s1hat 3s 4s -25%
use_hint 3s 2s +50%
yvec_init 3s 1s +200%
fqscale 2s 4s -50%
keccak_f1600_x1_native_aarch64 2s 1s +100%
keccak_f1600_x1_native_aarch64_v84a 2s 2s +0%
keccakf1600_extract_bytes (big endian) 2s 1s +100%
keccakf1600_xor_bytes 2s 5s -60%
keccakf1600_xor_bytes (big endian) 2s 3s -33%
mld_ct_cmask_neg_i32 2s 3s -33%
mld_ct_cmask_nonzero_u8 2s 1s +100%
mld_ct_get_optblocker_i64 2s 3s -33%
mld_ct_get_optblocker_u32 2s 1s +100%
mld_ct_get_optblocker_u8 2s 2s +0%
mld_ct_sel_int32 2s 2s +0%
mld_keccakf1600_extract_bytes 2s 2s +0%
ntt_native_aarch64 2s 4s -50%
pack_sig_c 2s 5s -60%
pointwise_native_x86_64 2s 3s -33%
poly_permute_bitrev_to_custom_optional_native 2s 3s -33%
poly_pointwise_montgomery_native 2s 3s -33%
poly_use_hint 2s 5s -60%
poly_use_hint_native 2s 5s -60%
poly_use_hint_native_aarch64 2s - new
polyt1_unpack 2s 3s -33%
polyvecl_pack_eta 2s 2s +0%
polyvecl_unpack_z 2s 3s -33%
polyw1_pack 2s 2s +0%
power2round 2s 2s +0%
rej_eta 2s 2s +0%
shake128_finalize 2s 5s -60%
shake128_init 2s 2s +0%
shake128_squeeze 2s 3s -33%
shake128x4_absorb_once 2s 2s +0%
shake128x4_squeezeblocks 2s 2s +0%
shake256 2s 2s +0%
sign_signature_extmu 2s 5s -60%
sk_s2hat_get_poly 2s 5s -60%
sys_check_capability 2s 2s +0%
unpack_sig 2s 5s -60%
yvec_get_poly 2s 3s -33%
keccak_finalize 1s 2s -50%
keccakf1600x4_permute 1s 1s +0%
mld_value_barrier_i64 1s 4s -75%
mld_value_barrier_u32 1s 2s -50%
mld_value_barrier_u8 1s 4s -75%
pack_sig_h_poly 1s 3s -67%
pack_sig_z 1s 2s -50%
poly_caddq 1s 3s -67%
polyt1_pack 1s 2s -50%
reduce32 1s 2s -50%
shake128_absorb 1s 4s -75%
shake256_finalize 1s 2s -50%
shake256x4_absorb_once 1s 2s -50%

@oqs-bot
Copy link
Copy Markdown
Contributor

oqs-bot commented Apr 13, 2026

CBMC Results (ML-DSA-65)

Full Results (200 proofs)
Proof Status Current Previous Change
**TOTAL** 2284s 2382s -4.1%
polyvecl_pointwise_acc_montgomery_c 509s 553s -8%
sign_verify_internal 178s 188s -5%
polyvec_matrix_expand 119s 128s -7%
rej_uniform_native 117s 130s -10%
mld_invntt_layer 93s 96s -3%
poly_pointwise_montgomery_c 92s 99s -7%
mld_ct_memcmp 78s 79s -1%
mld_attempt_signature_generation 48s 49s -2%
mld_ntt_layer 42s 47s -11%
sign_signature_internal 30s 30s +0%
sign_keypair_internal 29s 33s -12%
fqmul 28s 29s -3%
polyvec_matrix_expand_serial 26s 25s +4%
keccakf1600x4_permute_native 23s 23s +0%
polyvec_matrix_pointwise_montgomery 22s 23s -4%
sign_pk_from_sk 20s 20s +0%
polyvec_matrix_pointwise_montgomery_yvec 19s 20s -5%
mld_ntt_butterfly_block 18s 17s +6%
polyveck_decompose 17s 15s +13%
rej_uniform 17s 16s +6%
rej_uniform_c 17s 20s -15%
poly_chknorm_c 16s 18s -11%
polyt0_unpack 16s 14s +14%
polyveck_power2round 15s 17s -12%
poly_uniform_eta_4x 12s 12s +0%
keccak_absorb_once_x4 11s 10s +10%
poly_add 11s 12s -8%
poly_uniform_4x 11s 13s -15%
polyveck_add 10s 10s +0%
polyveck_pointwise_poly_montgomery 10s 11s -9%
keccakf1600_permute_native 9s 9s +0%
poly_invntt_tomont_c 9s 8s +12%
poly_power2round 9s 9s +0%
mld_check_pct 8s 8s +0%
polyveck_caddq 8s 8s +0%
polyveck_chknorm 8s 3s +167%
polyveck_reduce 8s 7s +14%
polyveck_shiftl 8s 9s -11%
polyvecl_ntt 8s 7s +14%
sign 8s 10s -20%
keccak_absorb 7s 8s -12%
mld_compute_pack_z 7s 6s +17%
mld_sample_s1_s2 7s 3s +133%
polyeta_pack 7s 3s +133%
polyveck_sub 7s 6s +17%
polyveck_use_hint 7s 7s +0%
polyvecl_chknorm 7s 5s +40%
polyz_unpack_c 7s 10s -30%
fqscale 6s 2s +200%
keccakf1600_permute 6s 7s -14%
pointwise_acc_native_x86_64 6s 7s -14%
polyveck_invntt_tomont 6s 5s +20%
polyz_unpack 6s 4s +50%
mld_ct_cmask_nonzero_u32 5s 3s +67%
mld_h 5s 3s +67%
mld_polymat_expand_entry 5s 4s +25%
mld_prepare_domain_separation_prefix 5s 3s +67%
pointwise_acc_native_aarch64 5s 7s -29%
poly_caddq 5s 3s +67%
poly_caddq_c 5s 6s -17%
poly_sub 5s 2s +150%
poly_uniform_gamma1 5s 5s +0%
polyeta_unpack 5s 2s +150%
polyveck_ntt 5s 7s -29%
polyveck_unpack_eta 5s 5s +0%
polyvecl_uniform_gamma1 5s 2s +150%
shake256_init 5s 2s +150%
sign_keypair 5s 4s +25%
sign_signature_pre_hash_shake256 5s 3s +67%
sign_verify_pre_hash_shake256 5s 4s +25%
sk_s2hat_get_poly 5s 2s +150%
unpack_hints 5s 5s +0%
yvec_get_poly 5s 2s +150%
keccak_squeezeblocks_x4 4s 5s -20%
keccakf1600x4_xor_bytes 4s 2s +100%
mld_ct_cmask_nonzero_u8 4s 2s +100%
mld_ct_sel_int32 4s 2s +100%
mld_sample_s1_s2_serial 4s 4s +0%
montgomery_reduce 4s 2s +100%
ntt_native_aarch64 4s 3s +33%
nttunpack_native_x86_64 4s 5s -20%
pack_sig_c 4s 5s -20%
pack_sk_s1 4s 3s +33%
pointwise_native_x86_64 4s 4s +0%
poly_caddq_native 4s 4s +0%
poly_challenge 4s 5s -20%
poly_decompose_native 4s 3s +33%
poly_invntt_tomont 4s 2s +100%
poly_permute_bitrev_to_custom_optional 4s 3s +33%
poly_pointwise_montgomery 4s 3s +33%
poly_pointwise_montgomery_native 4s 2s +100%
poly_uniform_gamma1_4x 4s 5s -20%
poly_use_hint_native_aarch64 4s - new
polyveck_pack_eta 4s 4s +0%
polyveck_unpack_t0 4s 5s -20%
polyvecl_unpack_z 4s 4s +0%
rej_eta_native 4s 6s -33%
shake128x4_absorb_once 4s 4s +0%
shake256_squeeze 4s 2s +100%
sign_open 4s 4s +0%
sign_signature 4s 6s -33%
sign_signature_extmu 4s 6s -33%
sign_signature_pre_hash_internal 4s 5s -20%
sign_verify_pre_hash_internal 4s 4s +0%
sk_s1hat_get_poly 4s 4s +0%
unpack_sk 4s 4s +0%
caddq 3s 3s +0%
keccak_f1600_x1_native_aarch64_v84a 3s 4s -25%
keccak_init 3s 2s +50%
keccak_squeeze 3s 3s +0%
keccakf1600_xor_bytes 3s 3s +0%
keccakf1600_xor_bytes (big endian) 3s 2s +50%
keccakf1600x4_permute 3s 3s +0%
mld_ct_abs_i32 3s 3s +0%
mld_ct_get_optblocker_i64 3s 3s +0%
pack_sig_h_poly 3s 3s +0%
pack_sig_z 3s 4s -25%
pack_sk_rho_key_tr_s2_t0 3s 2s +50%
pointwise_native_aarch64 3s 3s +0%
poly_caddq_native_aarch64 3s 3s +0%
poly_chknorm_native_aarch64 3s 3s +0%
poly_decompose_c 3s 4s -25%
poly_ntt 3s 2s +50%
poly_ntt_c 3s 3s +0%
poly_permute_bitrev_to_custom_optional_native 3s 4s -25%
poly_shiftl 3s 3s +0%
poly_uniform 3s 7s -57%
poly_uniform_eta 3s 3s +0%
poly_use_hint 3s 2s +50%
polyt1_unpack 3s 4s -25%
polyvec_matrix_pointwise_montgomery_row 3s 2s +50%
polyveck_pack_t0 3s 5s -40%
polyvecl_pointwise_acc_montgomery 3s 3s +0%
polyvecl_pointwise_acc_montgomery_native 3s 5s -40%
polyvecl_uniform_gamma1_serial 3s 3s +0%
polyvecl_unpack_eta 3s 5s -40%
polyw1_pack 3s 1s +200%
polyz_pack 3s 4s -25%
polyz_unpack_17_native_aarch64 3s 2s +50%
polyz_unpack_19_native_aarch64 3s 2s +50%
power2round 3s 2s +50%
reduce32 3s 4s -25%
rej_eta 3s 3s +0%
rej_eta_c 3s 3s +0%
shake128_finalize 3s 3s +0%
shake128_release 3s 1s +200%
shake128x4_squeezeblocks 3s 2s +50%
shake256 3s 3s +0%
shake256_finalize 3s 3s +0%
shake256x4_squeezeblocks 3s 2s +50%
sign_verify 3s 5s -40%
unpack_sig 3s 2s +50%
unpack_sk_s2hat 3s 2s +50%
unpack_sk_t0hat 3s 4s -25%
use_hint 3s 4s -25%
yvec_init 3s 2s +50%
intt_native_x86_64 2s 3s -33%
keccak_f1600_x1_native_aarch64 2s 3s -33%
keccak_f1600_x4_native_aarch64_v84a 2s 4s -50%
keccak_f1600_x4_native_aarch64_v8a_scalar_hybrid 2s 3s -33%
keccak_f1600_x4_native_aarch64_v8a_v84a_scalar_hybrid 2s 1s +100%
keccak_finalize 2s 1s +100%
keccakf1600x4_extract_bytes 2s 2s +0%
make_hint 2s 3s -33%
mld_ct_cmask_neg_i32 2s 3s -33%
mld_ct_get_optblocker_u32 2s 2s +0%
mld_ct_get_optblocker_u8 2s 2s +0%
mld_keccakf1600_extract_bytes 2s 3s -33%
mld_value_barrier_i64 2s 2s +0%
pack_pk 2s 2s +0%
poly_chknorm 2s 4s -50%
poly_chknorm_native 2s 3s -33%
poly_invntt_tomont_native 2s 2s +0%
poly_make_hint 2s 4s -50%
poly_ntt_native 2s 3s -33%
poly_reduce 2s 1s +100%
poly_use_hint_c 2s 1s +100%
polyt1_pack 2s 4s -50%
polyvecl_pack_eta 2s 4s -50%
polyz_unpack_native 2s 3s -33%
shake128_absorb 2s 3s -33%
shake128_init 2s 4s -50%
shake128_squeeze 2s 6s -67%
shake256_absorb 2s 2s +0%
shake256_release 2s 3s -33%
shake256x4_absorb_once 2s 2s +0%
sign_verify_extmu 2s 3s -33%
sk_t0hat_get_poly 2s 3s -33%
sys_check_capability 2s 2s +0%
unpack_pk 2s 4s -50%
unpack_sk_s1hat 2s 4s -50%
decompose 1s 3s -67%
keccakf1600_extract_bytes (big endian) 1s 3s -67%
mld_value_barrier_u32 1s 2s -50%
mld_value_barrier_u8 1s 2s -50%
ntt_native_x86_64 1s 5s -80%
poly_decompose 1s 2s -50%
poly_use_hint_native 1s 4s -75%
polyt0_pack 1s 4s -75%
polyveck_pack_w1 1s 1s +0%

@oqs-bot
Copy link
Copy Markdown
Contributor

oqs-bot commented Apr 13, 2026

CBMC Results (ML-DSA-44)

Full Results (200 proofs)
Proof Status Current Previous Change
**TOTAL** 1663s 1771s -6.1%
polyvecl_pointwise_acc_montgomery_c 125s 134s -7%
rej_uniform_native 125s 134s -7%
sign_verify_internal 110s 117s -6%
poly_pointwise_montgomery_c 98s 113s -13%
mld_invntt_layer 90s 100s -10%
mld_ct_memcmp 79s 87s -9%
mld_attempt_signature_generation 51s 49s +4%
mld_ntt_layer 46s 47s -2%
fqmul 29s 31s -6%
polyvec_matrix_expand 26s 25s +4%
polyvec_matrix_pointwise_montgomery 25s 25s +0%
sign_keypair_internal 24s 25s -4%
keccakf1600x4_permute_native 20s 22s -9%
rej_uniform_c 19s 19s +0%
poly_chknorm_c 18s 17s +6%
sign_pk_from_sk 18s 20s -10%
mld_ntt_butterfly_block 17s 16s +6%
polyt0_unpack 16s 16s +0%
rej_uniform 16s 17s -6%
polyveck_chknorm 15s 17s -12%
sign_signature_internal 15s 18s -17%
poly_uniform_4x 12s 12s +0%
poly_uniform_eta_4x 12s 13s -8%
polyvec_matrix_pointwise_montgomery_yvec 12s 13s -8%
poly_add 11s 11s +0%
mld_compute_pack_z 10s 8s +25%
poly_invntt_tomont_c 10s 10s +0%
polyz_unpack_c 10s 13s -23%
keccak_absorb_once_x4 9s 8s +12%
poly_power2round 9s 13s -31%
polyveck_decompose 8s 9s -11%
keccakf1600_permute_native 7s 7s +0%
mld_check_pct 7s 10s -30%
mld_h 7s 2s +250%
pointwise_acc_native_aarch64 7s 6s +17%
polyvec_matrix_expand_serial 7s 7s +0%
polyveck_pointwise_poly_montgomery 7s 5s +40%
sign_open 7s 4s +75%
keccakf1600_permute 6s 9s -33%
ntt_native_x86_64 6s 2s +200%
poly_caddq_c 6s 4s +50%
poly_uniform 6s 7s -14%
poly_uniform_gamma1_4x 6s 3s +100%
polyeta_unpack 6s 7s -14%
polyveck_invntt_tomont 6s 4s +50%
polyveck_pack_eta 6s 6s +0%
polyvecl_ntt 6s 5s +20%
rej_eta_c 6s 5s +20%
sign 6s 6s +0%
keccak_absorb 5s 6s -17%
keccak_squeezeblocks_x4 5s 5s +0%
nttunpack_native_x86_64 5s 3s +67%
pack_pk 5s 3s +67%
pointwise_acc_native_x86_64 5s 5s +0%
poly_decompose 5s 3s +67%
poly_use_hint_c 5s 4s +25%
poly_use_hint_native_aarch64 5s - new
polyveck_add 5s 6s -17%
polyveck_pack_w1 5s 4s +25%
polyveck_power2round 5s 7s -29%
polyvecl_chknorm 5s 5s +0%
polyvecl_uniform_gamma1_serial 5s 2s +150%
polyvecl_unpack_z 5s 2s +150%
polyw1_pack 5s 2s +150%
shake128_finalize 5s 2s +150%
sign_signature_extmu 5s 3s +67%
sign_signature_pre_hash_internal 5s 4s +25%
sys_check_capability 5s 3s +67%
yvec_init 5s 5s +0%
caddq 4s 3s +33%
keccak_f1600_x4_native_aarch64_v8a_scalar_hybrid 4s 2s +100%
keccak_squeeze 4s 3s +33%
keccakf1600x4_xor_bytes 4s 3s +33%
mld_sample_s1_s2 4s 5s -20%
mld_sample_s1_s2_serial 4s 3s +33%
mld_value_barrier_u32 4s 3s +33%
pack_sig_c 4s 2s +100%
pack_sig_z 4s 3s +33%
pack_sk_s1 4s 3s +33%
pointwise_native_x86_64 4s 3s +33%
poly_challenge 4s 4s +0%
poly_chknorm 4s 2s +100%
poly_ntt_native 4s 4s +0%
polyveck_shiftl 4s 2s +100%
polyveck_use_hint 4s 6s -33%
polyvecl_pointwise_acc_montgomery 4s 5s -20%
polyz_unpack 4s 3s +33%
polyz_unpack_17_native_aarch64 4s 4s +0%
polyz_unpack_19_native_aarch64 4s 3s +33%
shake256_squeeze 4s 2s +100%
sign_keypair 4s 7s -43%
sign_signature 4s 5s -20%
sign_signature_pre_hash_shake256 4s 7s -43%
sign_verify_pre_hash_internal 4s 3s +33%
unpack_hints 4s 8s -50%
unpack_sk 4s 2s +100%
unpack_sk_t0hat 4s 2s +100%
yvec_get_poly 4s 1s +300%
decompose 3s 4s -25%
keccak_f1600_x4_native_aarch64_v8a_v84a_scalar_hybrid 3s 2s +50%
keccakf1600_extract_bytes (big endian) 3s 4s -25%
keccakf1600_xor_bytes (big endian) 3s 1s +200%
keccakf1600x4_extract_bytes 3s 3s +0%
keccakf1600x4_permute 3s 2s +50%
make_hint 3s 3s +0%
mld_ct_get_optblocker_u8 3s 2s +50%
mld_keccakf1600_extract_bytes 3s 2s +50%
mld_prepare_domain_separation_prefix 3s 5s -40%
montgomery_reduce 3s 3s +0%
ntt_native_aarch64 3s 4s -25%
pack_sig_h_poly 3s 4s -25%
pointwise_native_aarch64 3s 3s +0%
poly_caddq 3s 5s -40%
poly_caddq_native 3s 4s -25%
poly_decompose_c 3s 3s +0%
poly_decompose_native 3s 6s -50%
poly_make_hint 3s 4s -25%
poly_ntt 3s 2s +50%
poly_permute_bitrev_to_custom_optional 3s 4s -25%
poly_permute_bitrev_to_custom_optional_native 3s 2s +50%
poly_pointwise_montgomery 3s 6s -50%
poly_pointwise_montgomery_native 3s 4s -25%
poly_reduce 3s 3s +0%
poly_shiftl 3s 3s +0%
poly_uniform_eta 3s 4s -25%
poly_uniform_gamma1 3s 3s +0%
polyeta_pack 3s 4s -25%
polyt0_pack 3s 5s -40%
polyt1_pack 3s 2s +50%
polyvec_matrix_pointwise_montgomery_row 3s 4s -25%
polyveck_caddq 3s 7s -57%
polyveck_reduce 3s 3s +0%
polyveck_sub 3s 6s -50%
polyveck_unpack_t0 3s 4s -25%
polyvecl_unpack_eta 3s 2s +50%
polyz_pack 3s 3s +0%
power2round 3s 3s +0%
rej_eta 3s 4s -25%
rej_eta_native 3s 3s +0%
shake128_absorb 3s 4s -25%
shake128x4_absorb_once 3s 4s -25%
shake256 3s 5s -40%
shake256_init 3s 3s +0%
sign_verify_pre_hash_shake256 3s 5s -40%
sk_s2hat_get_poly 3s 2s +50%
unpack_pk 3s 3s +0%
unpack_sig 3s 5s -40%
unpack_sk_s1hat 3s 3s +0%
use_hint 3s 3s +0%
intt_native_x86_64 2s 3s -33%
keccak_f1600_x1_native_aarch64 2s 3s -33%
keccak_f1600_x1_native_aarch64_v84a 2s 1s +100%
keccak_finalize 2s 3s -33%
keccak_init 2s 2s +0%
mld_ct_abs_i32 2s 4s -50%
mld_ct_cmask_neg_i32 2s 2s +0%
mld_ct_cmask_nonzero_u32 2s 3s -33%
mld_ct_cmask_nonzero_u8 2s 2s +0%
mld_ct_get_optblocker_u32 2s 4s -50%
mld_ct_sel_int32 2s 1s +100%
mld_polymat_expand_entry 2s 5s -60%
mld_value_barrier_i64 2s 2s +0%
pack_sk_rho_key_tr_s2_t0 2s 4s -50%
poly_caddq_native_aarch64 2s 4s -50%
poly_chknorm_native 2s 3s -33%
poly_invntt_tomont 2s 2s +0%
poly_invntt_tomont_native 2s 2s +0%
poly_ntt_c 2s 4s -50%
poly_sub 2s 4s -50%
poly_use_hint_native 2s 5s -60%
polyt1_unpack 2s 3s -33%
polyveck_ntt 2s 3s -33%
polyveck_pack_t0 2s 4s -50%
polyveck_unpack_eta 2s 4s -50%
polyvecl_pack_eta 2s 4s -50%
polyvecl_pointwise_acc_montgomery_native 2s 2s +0%
polyvecl_uniform_gamma1 2s 3s -33%
polyz_unpack_native 2s 4s -50%
reduce32 2s 2s +0%
shake128_init 2s 3s -33%
shake128_release 2s 3s -33%
shake128_squeeze 2s 3s -33%
shake128x4_squeezeblocks 2s 3s -33%
shake256_absorb 2s 2s +0%
shake256_finalize 2s 2s +0%
shake256x4_absorb_once 2s 2s +0%
shake256x4_squeezeblocks 2s 2s +0%
sign_verify 2s 6s -67%
sign_verify_extmu 2s 4s -50%
sk_s1hat_get_poly 2s 4s -50%
sk_t0hat_get_poly 2s 4s -50%
unpack_sk_s2hat 2s 3s -33%
fqscale 1s 3s -67%
keccak_f1600_x4_native_aarch64_v84a 1s 3s -67%
keccakf1600_xor_bytes 1s 3s -67%
mld_ct_get_optblocker_i64 1s 3s -67%
mld_value_barrier_u8 1s 2s -50%
poly_chknorm_native_aarch64 1s 3s -67%
poly_use_hint 1s 2s -50%
shake256_release 1s 2s -50%

Copy link
Copy Markdown
Contributor

@mkannwischer mkannwischer left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jakemas. Please add the CBMC proofs.

@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch 5 times, most recently from e7ca229 to e7a3a45 Compare April 14, 2026 07:44
@jakemas
Copy link
Copy Markdown
Contributor Author

jakemas commented Apr 14, 2026

Thanks @jakemas. Please add the CBMC proofs.

Thank you, added the harness as a single file with split functionality on GAMMA2

@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch 4 times, most recently from 1ea7797 to 817aa3d Compare April 17, 2026 10:22
Comment thread proofs/hol_light/aarch64/proofs/mldsa_poly_use_hint_32.ml Outdated
Comment thread proofs/hol_light/aarch64/proofs/mldsa_poly_use_hint_88.ml Outdated
Copy link
Copy Markdown
Contributor

@hanno-becker hanno-becker left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @jakemas for working on this!

The specification seems far too weak to claim functional correctness? We seem to only say something about the bounds of the output.

@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch 7 times, most recently from 40ec688 to de080de Compare April 17, 2026 15:39
Comment thread proofs/hol_light/common/mldsa_specs.ml Outdated
@mkannwischer mkannwischer force-pushed the hol-light-aarch64-use-hint branch from c97d731 to e96df0d Compare April 29, 2026 13:10
Comment thread proofs/hol_light/aarch64/proofs/poly_use_hint_32_aarch64_asm.ml Outdated
@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch from f2f03df to 2c8a1e6 Compare April 30, 2026 04:42
@jakemas
Copy link
Copy Markdown
Contributor Author

jakemas commented Apr 30, 2026

Update: FIPS 204-aligned specs for _32 (commit 2c8a1e6)

Addressed the review comments for the _32 variant:

FIPS 204 alignment (Hanno + Matthias)

  • Added mldsa_cmod, mldsa_decompose_32, mldsa_use_hint_32 definitions in mldsa_specs.ml, closely following FIPS 204 Algorithms 36 and 40
  • Proved MLDSA_USE_HINT_32_EQUIV: mldsa_use_hint_32 h r = mldsa_use_hint_32_spec r hno CHEAT_TAC
  • SUBROUTINE_CORRECT postcondition now uses mldsa_use_hint_32 (FIPS 204-aligned)
  • Key sub-lemmas: DECOMPOSE_32_R1_EQUIV (17-interval Barrett analysis), DECOMPOSE_32_R0_SIGN (r0 sign equivalence)

Other fixes

  • Removed unused MLDSA_USE_HINT_32_SPEC_BOUND (Matthias)
  • Safety proof uses SUBROUTINE_CORRECT_INTERNAL (code-aligned spec with val < 16 bound)
  • All specs in common/mldsa_specs.ml for x86 sharing

Pending

  • _88 variant: same treatment in progress

@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch 7 times, most recently from 67b89be to a879424 Compare May 1, 2026 00:43
@jakemas
Copy link
Copy Markdown
Contributor Author

jakemas commented May 1, 2026

woo got it! testing _88 in CI now then will merge shared code ( DIV_SANDWICH, ENSURES_STRENGTHEN_POST, and INT_MOD_RESIDUE) maybe go eat a div sandwich myself 🥪

@jakemas jakemas force-pushed the hol-light-aarch64-use-hint branch 5 times, most recently from 3c1d5a1 to 999243b Compare May 1, 2026 08:01
Copy link
Copy Markdown
Contributor

@mkannwischer mkannwischer left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you so much @jakemas for getting this to work so quickly! The use_hint and decompose specs now look great to me!

The only blocker left for me is that the SUBROUTINE_CORRECT_FIPS204 specs are missing the output bounds.
Also, can we rename SUBROUTINE_CORRECT_FIPS204 to SUBROUTINE_CORRECT for consistency with the other proofs and remove the old POLY_USE_HINT_32_AARCH64_ASM_SUBROUTINE_CORRECT? That is otherwise quite confusing to read.

Comment thread proofs/hol_light/aarch64/proofs/poly_use_hint_32_aarch64_asm.ml Outdated
Comment thread proofs/hol_light/aarch64/proofs/poly_use_hint_32_aarch64_asm.ml Outdated
Comment thread proofs/hol_light/aarch64/proofs/poly_use_hint_32_aarch64_asm.ml Outdated
Comment thread proofs/hol_light/aarch64/proofs/poly_use_hint_32_aarch64_asm.ml Outdated
Comment thread proofs/hol_light/common/mldsa_specs.ml
Copy link
Copy Markdown
Contributor

@mkannwischer mkannwischer left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jakemas for the updates.
The final specs now look great to me!

It would be nice if we could eliminate the CORRECT_CODE spec as there is no other proof in mlkem-native/mldsa-native that has multiple specs. I'd prefer it if we can keep this uniform. This can be done in a follow-up.

Two nits that should still be fixed regarding the CBMC contracts, but I'll take the liberty to fix those myself shortly.

Comment thread dev/aarch64_opt/src/arith_native_aarch64.h
Comment thread dev/aarch64_opt/src/arith_native_aarch64.h Outdated
Add formal verification proofs for poly_use_hint_32 and poly_use_hint_88
AArch64 NEON implementations using HOL Light.

Each proof file contains:
- Assembly-level functional correctness
- Subroutine correctness with code-aligned spec
- Constant-time and memory safety
- FIPS 204 equivalence proof via Barrett interval cascade
- FIPS 204-aligned subroutine correctness

FIPS 204 definitions (mldsa_cmod, mldsa_decompose, mldsa_use_hint) and
shared helpers (DIV_SANDWICH, INT_MOD_RESIDUE) are in mldsa_specs.ml.
ENSURES_STRENGTHEN_POST lives in aarch64_utils.ml.

The public SUBROUTINE_CORRECT theorem in each proof uses the FIPS 204
spec mldsa_use_hint_{32,88} in its postcondition and carries the output
bound (< 16 or < 44) as a corollary. Code-aligned intermediates use
*_CODE suffixes to mark them internal.

Signed-off-by: Jake Massimo <jakemas@amazon.com>
@hanno-becker
Copy link
Copy Markdown
Contributor

hanno-becker commented May 2, 2026

@jakemas This is the pattern I had in mind: You write the spec as you want it, but start the proof by massaging it into a shape that's better aligned to the code, including a) adding the automatic bounds, b) moving from FIPS204-aligned to code-aligned.

Could you internalize this pattern, and open a follow-up PR removing POLY_USE_HINT_88_AARCH64_ASM_CORRECT_CODE and POLY_USE_HINT_88_AARCH64_ASM_CORRECT_BOUND_CODE?

let MLDSA_USE_HINT_88_EQUIV = prove(
  `!r h. r < 8380417 /\ h <= 1
         ==> mldsa_use_hint_88 h r = mldsa_use_hint_88_code r h`, 
   (* FROM YOUR CODE *) );;

let POLY_USE_HINT_88_AARCH64_ASM_CORRECT = prove
 (`!b a h x y pc.
    nonoverlapping (word pc, LENGTH poly_use_hint_88_aarch64_asm_mc) (b, 1024) /\
    nonoverlapping (b, 1024) (a, 1024) /\
    nonoverlapping (b, 1024) (h, 1024)
    ==> ensures arm
          (\s. aligned_bytes_loaded s (word pc) poly_use_hint_88_aarch64_asm_mc /\
               read PC s = word pc /\
               C_ARGUMENTS [b; a; h] s /\
               (!i. i < 256 ==> val(x i) < 8380417) /\
               (!i. i < 256 ==> val(y i) <= 1) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add h (word(4 * i)))) s = y i))
          (\s. read PC s = word(pc + LENGTH poly_use_hint_88_aarch64_asm_mc - 4) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add b (word(4 * i)))) s =
                   word(mldsa_use_hint_88 (val(y i)) (val(x i)))) /\
               (!i. i < 256 ==>
                 val(read(memory :> bytes32(word_add b (word(4 * i)))) s) < 44))
          (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
           MAYCHANGE [memory :> bytes(b, 1024)])`,

  (* First, globalize the variables and assumptions. In particular, this will
    allow us to refer to the pure preconditions when rewriting the post condition. *)
  MAP_EVERY X_GEN_TAC
    [`b:int64`; `a:int64`; `h:int64`;
     `x:num->int32`; `y:num->int32`; `pc:num`] THEN
  REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS;
              NONOVERLAPPING_CLAUSES; ALL;
              fst POLY_USE_HINT_88_AARCH64_ASM_EXEC] THEN
  DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN
  GLOBALIZE_PRECONDITION_TAC THEN
  
  (* First, show that the bounds in the post-condition are automatic once the
     functional description is established. *)
   MATCH_MP_TAC ENSURES_STRENGTHEN_POST THEN
   EXISTS_TAC
    `\s. read PC s = word(pc + LENGTH poly_use_hint_88_aarch64_asm_mc - 4) /\
         (!i. i < 256 ==>
           read(memory :> bytes32(word_add b (word(4 * i)))) s =
             word(mldsa_use_hint_88_code (val(x i:int32)) (val(y i:int32))))` THEN
   CONJ_TAC THENL 
  [ (* Defer main proof *)
    ALL_TAC;
    (* Rewrite to code-aligned functional description *)
    REPEAT (IMP_REWRITE_TAC[MLDSA_USE_HINT_88_EQUIV]) THEN
    (* Show that bounds are automatic *)
   REWRITE_TAC[fst POLY_USE_HINT_88_AARCH64_ASM_EXEC] THEN REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN
   FIRST_X_ASSUM(MP_TAC o SPEC `i:num`) THEN ASM_REWRITE_TAC[] THEN
   DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD; DIMINDEX_32] THEN
   CONV_TAC NUM_REDUCE_CONV THEN
   MATCH_MP_TAC(ARITH_RULE `x < 44 ==> x MOD 4294967296 < 44`) THEN
   REWRITE_TAC[mldsa_use_hint_88_code] THEN
   CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN
   REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN ASM_ARITH_TAC] THEN

   (* Now do the actual proof -- literally as before *)
  CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN
  CONV_TAC NUM_REDUCE_CONV THEN
  REPEAT STRIP_TAC THEN
  REWRITE_TAC[SOME_FLAGS; MODIFIABLE_SIMD_REGS] THEN

  ENSURES_INIT_TAC "s0" THEN
  MEMORY_128_FROM_32_TAC "a" 0 64 THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN
  STRIP_TAC THEN
  MEMORY_128_FROM_32_TAC "h" 0 64 THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN
  STRIP_TAC THEN
  DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN

  MAP_EVERY (fun n -> ARM_STEPS_TAC POLY_USE_HINT_88_AARCH64_ASM_EXEC [n] THEN
                      SIMD_SIMPLIFY_TAC[])
        (1--1006) THEN
  ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN

  REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o
    CONV_RULE (SIMD_SIMPLIFY_CONV []) o
    CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o
    check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN

  CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN
  CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN
  REWRITE_TAC[WORD_ADD_0] THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN

  (* Push word_subword through SIMD ops to per-element form *)
  REWRITE_TAC[WORD_SUBWORD_AND; WORD_SUBWORD_OR] THEN
  let WSN_TAC = REWRITE_TAC(map (fun n -> prove(
    subst [mk_small_numeral n, `n:num`]
      `!x:int128. word_subword(word_not x) (n,32):int32 = word_not(word_subword x (n,32))`,
    GEN_TAC THEN MATCH_MP_TAC WORD_SUBWORD_NOT THEN
    REWRITE_TAC[DIMINDEX_32; DIMINDEX_128] THEN ARITH_TAC)) [0;32;64;96]) in
  WSN_TAC THEN
  CONV_TAC(DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN
  CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN
  let EC_DEEP = CONV_RULE(DEPTH_CONV WORD_NUM_RED_CONV)
    (CONV_RULE(DEPTH_CONV(INT_RED_CONV ORELSEC NUM_RED_CONV))
      (CONV_RULE(TOP_DEPTH_CONV let_CONV)
        (REWRITE_RULE[mldsa_use_hint_88_asm; word_2smulh; word_ishr_round;
                       DIMINDEX_32] ELEMENT_CORRECT_WORD_88))) in
  let EC_OR = ONCE_REWRITE_RULE[WORD_OR_SYM] EC_DEEP in
  REPEAT CONJ_TAC THEN
  (MATCH_MP_TAC EC_OR ORELSE MATCH_MP_TAC EC_DEEP) THEN
  CONJ_TAC THEN FIRST_X_ASSUM MATCH_MP_TAC THEN ARITH_TAC);;

@jakemas
Copy link
Copy Markdown
Contributor Author

jakemas commented May 2, 2026

@jakemas This is the pattern I had in mind: You write the spec as you want it, but start the proof by massaging it into a shape that's better aligned to the code, including a) adding the automatic bounds, b) moving from FIPS204-aligned to code-aligned.

Could you internalize this pattern, and open a follow-up PR removing POLY_USE_HINT_88_AARCH64_ASM_CORRECT_CODE and POLY_USE_HINT_88_AARCH64_ASM_CORRECT_BOUND_CODE?

let MLDSA_USE_HINT_88_EQUIV = prove(
  `!r h. r < 8380417 /\ h <= 1
         ==> mldsa_use_hint_88 h r = mldsa_use_hint_88_code r h`, 
   (* FROM YOUR CODE *) );;

let POLY_USE_HINT_88_AARCH64_ASM_CORRECT = prove
 (`!b a h x y pc.
    nonoverlapping (word pc, LENGTH poly_use_hint_88_aarch64_asm_mc) (b, 1024) /\
    nonoverlapping (b, 1024) (a, 1024) /\
    nonoverlapping (b, 1024) (h, 1024)
    ==> ensures arm
          (\s. aligned_bytes_loaded s (word pc) poly_use_hint_88_aarch64_asm_mc /\
               read PC s = word pc /\
               C_ARGUMENTS [b; a; h] s /\
               (!i. i < 256 ==> val(x i) < 8380417) /\
               (!i. i < 256 ==> val(y i) <= 1) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add h (word(4 * i)))) s = y i))
          (\s. read PC s = word(pc + LENGTH poly_use_hint_88_aarch64_asm_mc - 4) /\
               (!i. i < 256 ==>
                 read(memory :> bytes32(word_add b (word(4 * i)))) s =
                   word(mldsa_use_hint_88 (val(y i)) (val(x i)))) /\
               (!i. i < 256 ==>
                 val(read(memory :> bytes32(word_add b (word(4 * i)))) s) < 44))
          (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
           MAYCHANGE [memory :> bytes(b, 1024)])`,

  (* First, globalize the variables and assumptions. In particular, this will
    allow us to refer to the pure preconditions when rewriting the post condition. *)
  MAP_EVERY X_GEN_TAC
    [`b:int64`; `a:int64`; `h:int64`;
     `x:num->int32`; `y:num->int32`; `pc:num`] THEN
  REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS;
              NONOVERLAPPING_CLAUSES; ALL;
              fst POLY_USE_HINT_88_AARCH64_ASM_EXEC] THEN
  DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN
  GLOBALIZE_PRECONDITION_TAC THEN
  
  (* First, show that the bounds in the post-condition are automatic once the
     functional description is established. *)
   MATCH_MP_TAC ENSURES_STRENGTHEN_POST THEN
   EXISTS_TAC
    `\s. read PC s = word(pc + LENGTH poly_use_hint_88_aarch64_asm_mc - 4) /\
         (!i. i < 256 ==>
           read(memory :> bytes32(word_add b (word(4 * i)))) s =
             word(mldsa_use_hint_88_code (val(x i:int32)) (val(y i:int32))))` THEN
   CONJ_TAC THENL 
  [ (* Defer main proof *)
    ALL_TAC;
    (* Rewrite to code-aligned functional description *)
    REPEAT (IMP_REWRITE_TAC[MLDSA_USE_HINT_88_EQUIV]) THEN
    (* Show that bounds are automatic *)
   REWRITE_TAC[fst POLY_USE_HINT_88_AARCH64_ASM_EXEC] THEN REPEAT STRIP_TAC THEN ASM_REWRITE_TAC[] THEN
   FIRST_X_ASSUM(MP_TAC o SPEC `i:num`) THEN ASM_REWRITE_TAC[] THEN
   DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD; DIMINDEX_32] THEN
   CONV_TAC NUM_REDUCE_CONV THEN
   MATCH_MP_TAC(ARITH_RULE `x < 44 ==> x MOD 4294967296 < 44`) THEN
   REWRITE_TAC[mldsa_use_hint_88_code] THEN
   CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN
   REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN ASM_ARITH_TAC] THEN

   (* Now do the actual proof -- literally as before *)
  CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN
  CONV_TAC NUM_REDUCE_CONV THEN
  REPEAT STRIP_TAC THEN
  REWRITE_TAC[SOME_FLAGS; MODIFIABLE_SIMD_REGS] THEN

  ENSURES_INIT_TAC "s0" THEN
  MEMORY_128_FROM_32_TAC "a" 0 64 THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN
  STRIP_TAC THEN
  MEMORY_128_FROM_32_TAC "h" 0 64 THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN
  STRIP_TAC THEN
  DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN

  MAP_EVERY (fun n -> ARM_STEPS_TAC POLY_USE_HINT_88_AARCH64_ASM_EXEC [n] THEN
                      SIMD_SIMPLIFY_TAC[])
        (1--1006) THEN
  ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN

  REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o
    CONV_RULE (SIMD_SIMPLIFY_CONV []) o
    CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o
    check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN

  CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN
  CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN
  REWRITE_TAC[WORD_ADD_0] THEN
  ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN

  (* Push word_subword through SIMD ops to per-element form *)
  REWRITE_TAC[WORD_SUBWORD_AND; WORD_SUBWORD_OR] THEN
  let WSN_TAC = REWRITE_TAC(map (fun n -> prove(
    subst [mk_small_numeral n, `n:num`]
      `!x:int128. word_subword(word_not x) (n,32):int32 = word_not(word_subword x (n,32))`,
    GEN_TAC THEN MATCH_MP_TAC WORD_SUBWORD_NOT THEN
    REWRITE_TAC[DIMINDEX_32; DIMINDEX_128] THEN ARITH_TAC)) [0;32;64;96]) in
  WSN_TAC THEN
  CONV_TAC(DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN
  CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN
  let EC_DEEP = CONV_RULE(DEPTH_CONV WORD_NUM_RED_CONV)
    (CONV_RULE(DEPTH_CONV(INT_RED_CONV ORELSEC NUM_RED_CONV))
      (CONV_RULE(TOP_DEPTH_CONV let_CONV)
        (REWRITE_RULE[mldsa_use_hint_88_asm; word_2smulh; word_ishr_round;
                       DIMINDEX_32] ELEMENT_CORRECT_WORD_88))) in
  let EC_OR = ONCE_REWRITE_RULE[WORD_OR_SYM] EC_DEEP in
  REPEAT CONJ_TAC THEN
  (MATCH_MP_TAC EC_OR ORELSE MATCH_MP_TAC EC_DEEP) THEN
  CONJ_TAC THEN FIRST_X_ASSUM MATCH_MP_TAC THEN ARITH_TAC);;

ok, will do!

Copy link
Copy Markdown
Contributor

@hanno-becker hanno-becker left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many thanks @jakemas !

@mkannwischer
Copy link
Copy Markdown
Contributor

mkannwischer commented May 2, 2026

I opened #1085 for @hanno-becker's suggestion so we don't forget.

I will merge this PR shortly.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants