Skip to content

Commit 107b50f

Browse files
unamedkrclaude
andcommitted
score.sh: add 10-year position dimension + CI guard
New 6th scoring dimension explicitly measures the structural moats that define our 10-year position. Previously the harness measured what we built (structure/correctness/quality/perf/integration) but had no metric for what makes us defensible vs llama.cpp. Six new sub-dimensions (weights total 9): - single_header_loc quant.h <= 16,000 lines (currently 15,785) - single_header_size quant.h <= 700 KB (currently 646 KB) - core_zero_deps src/core/*.c only includes libc / SIMD intrinsics / OS threading / project headers - papers_implemented polar / qjl / turbo / uniform / turbo_kv = 5 - honest_corrections >=4 self-corrections logged in CHANGELOG - pypi_distribution pyproject.toml + publish.yml present Whitelisted in core_zero_deps: arm_neon.h, immintrin.h, wasm_simd128.h, pthread.h, windows.h, sched.h, sys/* etc - these are all libc/OS, not third-party deps. Run history skip rule: --bench, --quality, --position no longer pollute .score_history. Only --quick and --full write to the trend. CI guard: .github/workflows/ci.yml runs `score.sh --position` on Linux and fails the build if the position dimension drops below 75%. This means any future change that bloats quant.h past 16K LOC, adds a third-party dep to src/core, removes a paper implementation, etc., gets caught at PR time, not after PyPI ships. Baseline (v0.8.1, Apple Silicon, --quick mode): total 96.1% (was 94.5% under 5-dim scoring) position 100.0% Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1bc4611 commit 107b50f

File tree

2 files changed

+135
-3
lines changed

2 files changed

+135
-3
lines changed

.github/workflows/ci.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,28 @@ jobs:
5555
- name: Run tests
5656
run: ctest --test-dir build --output-on-failure --timeout 120 -C Release
5757

58+
# 10-year position guard: fail CI if structural moats erode.
59+
# Runs on Linux only (POSIX shell). Checks single-header LOC/size,
60+
# zero-deps, papers count, honest correction track, PyPI presence.
61+
- name: 10-year position guard
62+
if: matrix.os == 'ubuntu-latest'
63+
shell: bash
64+
run: |
65+
chmod +x score.sh
66+
bash score.sh --position 2>&1 | tee /tmp/position.log
67+
# Extract dimension percentage; require >= 75%.
68+
pct=$(grep -E '^\s*position\s' /tmp/position.log | awk '{print $2}' | tr -d '%')
69+
echo "position dimension: ${pct}%"
70+
if [ -z "$pct" ]; then
71+
echo "could not parse position score"
72+
exit 1
73+
fi
74+
# bash arithmetic: integer compare
75+
if [ "${pct%.*}" -lt 75 ]; then
76+
echo "::error::Position dimension regressed below 75% (${pct}%)"
77+
exit 1
78+
fi
79+
5880
- name: Upload test results
5981
if: failure()
6082
uses: actions/upload-artifact@v4

score.sh

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,103 @@ eval_integration() {
306306
print_item "documentation" "$dc" 3 1
307307
}
308308

309+
# ============================================================
310+
# DIMENSION 6: 10-YEAR POSITION (structural moats)
311+
# ------------------------------------------------------------
312+
# These metrics protect the project's defensible position:
313+
# single-header embeddability, zero deps, research velocity,
314+
# claim audit-ability. Anything that erodes them should drag
315+
# the score, even if other dimensions look fine.
316+
# ============================================================
317+
eval_position() {
318+
echo -e "\n${BOLD}${CYAN}[6/6] 10-YEAR POSITION (structural moats)${NC}"
319+
320+
# ----- Single-header LOC budget (≤ 16,000 lines) -----
321+
local sh_loc=0
322+
if [ -f "$PROJECT_DIR/quant.h" ]; then
323+
sh_loc=$(wc -l < "$PROJECT_DIR/quant.h" | tr -d ' ')
324+
fi
325+
local sh_loc_score=0
326+
if [ "$sh_loc" -gt 0 ] && [ "$sh_loc" -le 16000 ]; then
327+
sh_loc_score=1
328+
fi
329+
log_score "position" "single_header_loc" "$sh_loc_score" 1 2
330+
print_item "single_header_loc ($sh_loc / 16000)" "$sh_loc_score" 1 2
331+
332+
# ----- Single-header binary size budget (≤ 700 KB) -----
333+
local sh_size=0
334+
if [ -f "$PROJECT_DIR/quant.h" ]; then
335+
# macOS / BSD stat -f%z, GNU stat -c%s — try both
336+
sh_size=$(stat -f%z "$PROJECT_DIR/quant.h" 2>/dev/null || stat -c%s "$PROJECT_DIR/quant.h" 2>/dev/null || echo 0)
337+
fi
338+
local sh_size_kb=$((sh_size / 1024))
339+
local sh_size_score=0
340+
if [ "$sh_size_kb" -gt 0 ] && [ "$sh_size_kb" -le 700 ]; then
341+
sh_size_score=1
342+
fi
343+
log_score "position" "single_header_size" "$sh_size_score" 1 1
344+
print_item "single_header_size (${sh_size_kb} KB / 700)" "$sh_size_score" 1 1
345+
346+
# ----- Zero external dependencies in core (libc/libm/intrinsics/OS) -----
347+
# Allowed:
348+
# - C standard library headers
349+
# - SIMD intrinsics (arm_neon.h, immintrin.h, wasm_simd128.h)
350+
# - OS threading / kernel headers (pthread.h, windows.h, sched.h)
351+
# - Project headers (turboquant/*, tq_*)
352+
# A failure here means we picked up a real third-party dep.
353+
local bad_includes=0
354+
if [ -d "$PROJECT_DIR/src/core" ]; then
355+
bad_includes=$(grep -hE '^[[:space:]]*#include[[:space:]]*[<"]' "$PROJECT_DIR/src/core/"*.c 2>/dev/null \
356+
| grep -vE '<(stdint|string|math|stdlib|stdio|stddef|stdbool|assert|float|limits|inttypes|errno|time|ctype|signal)\.h>' \
357+
| grep -vE '<(arm_neon|immintrin|wasm_simd128|x86intrin|emmintrin|smmintrin|tmmintrin|nmmintrin|avxintrin|avx2intrin)\.h>' \
358+
| grep -vE '<(pthread|sched|unistd|sys/[a-z_]+|windows|fcntl)\.h>' \
359+
| grep -vE '"(turboquant/|tq_)' \
360+
| wc -l | tr -d ' ')
361+
fi
362+
local deps_score=0
363+
[ "$bad_includes" = "0" ] && deps_score=1
364+
log_score "position" "core_zero_deps" "$deps_score" 1 2
365+
print_item "core_zero_deps ($bad_includes foreign includes)" "$deps_score" 1 2
366+
367+
# ----- Papers ported (research velocity proxy) -----
368+
# Counts implementation files matching known KV-quant paper algorithms.
369+
# Goal is +1 every quarter; baseline as of v0.8.0 = 5 (polar, qjl, turbo,
370+
# uniform, turbo_kv). Score reflects whether we're maintaining the count.
371+
local papers=0
372+
[ -f "$PROJECT_DIR/src/core/tq_polar.c" ] && papers=$((papers + 1))
373+
[ -f "$PROJECT_DIR/src/core/tq_qjl.c" ] && papers=$((papers + 1))
374+
[ -f "$PROJECT_DIR/src/core/tq_turbo.c" ] && papers=$((papers + 1))
375+
[ -f "$PROJECT_DIR/src/core/tq_uniform.c" ] && papers=$((papers + 1))
376+
[ -f "$PROJECT_DIR/src/core/tq_turbo_kv.c" ] && papers=$((papers + 1))
377+
log_score "position" "papers_implemented" "$papers" 5 2
378+
print_item "papers_implemented" "$papers" 5 2
379+
380+
# ----- Honest correction track (CHANGELOG retrospective entries) -----
381+
# Counts CHANGELOG headings that name a self-correction. Reframes
382+
# corrections as a positive — they're our trust asset.
383+
local corrections=0
384+
if [ -f "$PROJECT_DIR/CHANGELOG.md" ]; then
385+
corrections=$(grep -ciE 'honest correction|self.?corrected|hotfix|retracted|retract' \
386+
"$PROJECT_DIR/CHANGELOG.md" 2>/dev/null || echo 0)
387+
fi
388+
# Cap at 10 — beyond that the metric stops rewarding new ones.
389+
[ "$corrections" -gt 10 ] && corrections=10
390+
local correction_score=0
391+
[ "$corrections" -ge 4 ] && correction_score=1
392+
log_score "position" "honest_corrections" "$correction_score" 1 1
393+
print_item "honest_corrections ($corrections logged)" "$correction_score" 1 1
394+
395+
# ----- PyPI distribution channel live -----
396+
local pypi_live=0
397+
if [ -f "$PROJECT_DIR/bindings/python/pyproject.toml" ] && \
398+
grep -q '^name *= *"quantcpp"' "$PROJECT_DIR/bindings/python/pyproject.toml" && \
399+
[ -f "$PROJECT_DIR/.github/workflows/publish.yml" ]; then
400+
pypi_live=1
401+
fi
402+
log_score "position" "pypi_distribution" "$pypi_live" 1 1
403+
print_item "pypi_distribution" "$pypi_live" 1 1
404+
}
405+
309406
# ============================================================
310407
# FINAL REPORT
311408
# ============================================================
@@ -333,7 +430,7 @@ print_final() {
333430
# Dimension breakdown
334431
echo ""
335432
echo " Dimension Breakdown:"
336-
for dim in structure correctness quality performance integration; do
433+
for dim in structure correctness quality performance integration position; do
337434
local ds=0 dw=0
338435
while IFS='|' read -r cat name score max weight; do
339436
if [ "$cat" = "$dim" ]; then
@@ -350,8 +447,16 @@ print_final() {
350447
fi
351448
done
352449

353-
# Save history
354-
echo "$(date '+%Y-%m-%d %H:%M:%S') $final" >> "$SCORE_LOG"
450+
# Save history — only for full / quick evaluations.
451+
# Single-dimension modes (--bench, --quality, --position) skip the log
452+
# so partial scores don't pollute the trend line.
453+
case "$MODE" in
454+
--bench|--quality|--position)
455+
;;
456+
*)
457+
echo "$(date '+%Y-%m-%d %H:%M:%S') $final" >> "$SCORE_LOG"
458+
;;
459+
esac
355460

356461
# Trend
357462
if [ -f "$SCORE_LOG" ] && [ "$(wc -l < "$SCORE_LOG" | tr -d ' ')" -gt 1 ]; then
@@ -379,19 +484,24 @@ case "$MODE" in
379484
--quick)
380485
eval_structure
381486
eval_correctness
487+
eval_position
382488
;;
383489
--bench)
384490
eval_performance
385491
;;
386492
--quality)
387493
eval_quality
388494
;;
495+
--position)
496+
eval_position
497+
;;
389498
--full|*)
390499
eval_structure
391500
eval_correctness
392501
eval_quality
393502
eval_performance
394503
eval_integration
504+
eval_position
395505
;;
396506
esac
397507

0 commit comments

Comments
 (0)