Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 116 additions & 99 deletions docs/proposals/SKILL_SCAN_DESIGN.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"dev:all": "concurrently \"npm run dev:api\" \"npm run dev:worker\" \"npm run dev:gateway\"",
"build:api": "cd server && go build -o bin/server ./cmd/api",
"build:worker": "cd server && go build -o bin/worker ./cmd/worker",
"backfill:scan": "cd server && go run ./cmd/worker -- scan-backfill",
"build:gateway": "cd gateway && go build -o bin/gateway ./cmd",
"build": "npm run build:api && npm run build:worker && npm run build:gateway",
"docker:up": "docker-compose up -d",
Expand Down
2 changes: 1 addition & 1 deletion portal
165 changes: 158 additions & 7 deletions server/cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package main

import (
"context"
"flag"
"fmt"
"log"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"

Expand All @@ -22,6 +24,15 @@ import (
)

func main() {
if len(os.Args) > 1 && os.Args[1] == "scan-backfill" {
runScanBackfill(os.Args[2:])
return
}

runWorker()
}

func runWorker() {
// Initialise structured logging with daily rotation and 7-day retention.
// Worker uses a "worker" file prefix to separate logs from the API server:
// logs/worker-app.log – all worker messages (DEBUG+), including full SQL
Expand Down Expand Up @@ -79,11 +90,13 @@ func main() {
tmpDir = os.TempDir() + "/costrict-sync"
}

scanJobSvc := &services.ScanJobService{DB: db}
syncSvc := &services.SyncService{
DB: db,
Git: &services.GitService{TempBaseDir: tmpDir},
Parser: &services.ParserService{},
CategorySvc: &services.CategoryService{DB: db},
DB: db,
Git: &services.GitService{TempBaseDir: tmpDir},
Parser: &services.ParserService{},
ScanJobService: scanJobSvc,
CategorySvc: &services.CategoryService{DB: db},
}

concurrency, _ := strconv.Atoi(os.Getenv("WORKER_CONCURRENCY"))
Expand Down Expand Up @@ -120,9 +133,10 @@ func main() {

scanLLMClient := llm.NewClient(&llmCfg)
scanSvc := &services.ScanService{
DB: db,
LLMClient: scanLLMClient,
ModelName: llmCfg.Model,
DB: db,
LLMClient: scanLLMClient,
ModelName: llmCfg.Model,
CategorySvc: &services.CategoryService{DB: db},
}

scanConcurrency, _ := strconv.Atoi(os.Getenv("SCAN_WORKER_CONCURRENCY"))
Expand Down Expand Up @@ -157,6 +171,143 @@ func main() {
log.Println("Worker pools stopped")
}

type latestRevisionRow struct {
ItemID string
LatestRevision int
}

func runScanBackfill(args []string) {
logger.Init(logger.Config{
Dir: "./logs",
FilePrefix: "worker",
MaxAgeDays: 7,
Console: true,
ConsoleLevel: "warn",
})

fs := flag.NewFlagSet("scan-backfill", flag.ExitOnError)

var (
allItems bool
securityStatus string
registryID string
itemType string
limit int
triggerType string
priority int
maxAttempts int
dryRun bool
)

fs.BoolVar(&allItems, "all", false, "enqueue all active items instead of filtering by security status")
fs.StringVar(&securityStatus, "security-status", "unscanned", "filter by capability_items.security_status when --all=false")
fs.StringVar(&registryID, "registry-id", "", "only enqueue items in the given registry")
fs.StringVar(&itemType, "item-type", "", "only enqueue items of the given type (skill|subagent|command|mcp)")
fs.IntVar(&limit, "limit", 0, "max number of items to enqueue (0 = no limit)")
fs.StringVar(&triggerType, "trigger-type", "manual", "scan job trigger type to record")
fs.IntVar(&priority, "priority", 1, "scan job priority")
fs.IntVar(&maxAttempts, "max-attempts", 2, "scan job max attempts")
fs.BoolVar(&dryRun, "dry-run", false, "preview items without inserting scan jobs")
_ = fs.Parse(args)
Comment on lines +206 to +211
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Reject negative queue controls.

Line 282 forwards priority and maxAttempts verbatim, and server/internal/services/scan_job_service.go:24-63 only normalizes zero. scan-backfill --priority=-1 or --max-attempts=-1 therefore persists invalid job settings instead of failing fast.

Suggested fix
 	fs.BoolVar(&dryRun, "dry-run", false, "preview items without inserting scan jobs")
 	_ = fs.Parse(args)
+	if priority < 0 {
+		log.Fatal("--priority must be >= 0")
+	}
+	if maxAttempts < 0 {
+		log.Fatal("--max-attempts must be >= 0")
+	}
 
 	cfg := config.Load()

Also applies to: 282-285

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@server/cmd/worker/main.go` around lines 206 - 211, The CLI currently allows
negative values for the flags `priority` and `maxAttempts` (set via `fs.IntVar`)
which are later passed through unchanged; add validation immediately after `_ =
fs.Parse(args)` in `main` to reject negatives: if `priority < 0` or `maxAttempts
< 0` print a clear error and exit (non-zero) so invalid
`--priority`/`--max-attempts` are refused rather than persisted; ensure the
check uses the same variable names (`priority`, `maxAttempts`, `dryRun`,
`limit`, `triggerType`) so callers fail fast before any call into the scan job
creation/`scan_job_service` code.


cfg := config.Load()
db, err := database.Initialize(cfg.DatabaseURL)
if err != nil {
log.Fatalf("failed to initialize database: %v", err)
}

query := db.Model(&models.CapabilityItem{}).Where("status = ?", "active")
if !allItems {
query = query.Where("security_status = ?", securityStatus)
}
if registryID != "" {
query = query.Where("registry_id = ?", registryID)
}
if itemType != "" {
query = query.Where("item_type = ?", itemType)
}
if limit > 0 {
query = query.Limit(limit)
}

var items []models.CapabilityItem
if err := query.Order("created_at ASC").Find(&items).Error; err != nil {
log.Fatalf("failed to query items: %v", err)
}

if len(items) == 0 {
log.Println("no matching items found")
return
}

itemIDs := make([]string, 0, len(items))
for _, item := range items {
itemIDs = append(itemIDs, item.ID)
}

var revisions []latestRevisionRow
if err := db.Model(&models.CapabilityVersion{}).
Select("item_id, COALESCE(MAX(revision), 0) AS latest_revision").
Where("item_id IN ?", itemIDs).
Group("item_id").
Scan(&revisions).Error; err != nil {
log.Fatalf("failed to query latest revisions: %v", err)
}

revisionByItemID := make(map[string]int, len(revisions))
for _, row := range revisions {
revisionByItemID[row.ItemID] = row.LatestRevision
}

scanJobSvc := &services.ScanJobService{DB: db}

var (
enqueued int
skipped int
failed int
)

for _, item := range items {
revision := revisionByItemID[item.ID]
if revision <= 0 {
revision = 1
}
Comment on lines +271 to +274
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Skip items that have no version row.

server/internal/services/scan_job_service.go:24-63 inserts whatever itemRevision it receives. Falling back to 1 on Lines 272-274 can enqueue pending jobs for items that have no models.CapabilityVersion at all.

Suggested fix
-		revision := revisionByItemID[item.ID]
-		if revision <= 0 {
-			revision = 1
-		}
+		revision, ok := revisionByItemID[item.ID]
+		if !ok || revision <= 0 {
+			skipped++
+			log.Printf("skipped item=%s slug=%s reason=no-capability-version", item.ID, item.Slug)
+			continue
+		}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
revision := revisionByItemID[item.ID]
if revision <= 0 {
revision = 1
}
revision, ok := revisionByItemID[item.ID]
if !ok || revision <= 0 {
skipped++
log.Printf("skipped item=%s slug=%s reason=no-capability-version", item.ID, item.Slug)
continue
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@server/cmd/worker/main.go` around lines 271 - 274, The current fallback to
revision = 1 for items with no version row (using revisionByItemID[item.ID]) can
enqueue jobs for items lacking any models.CapabilityVersion; instead, check the
map presence with the comma-ok idiom (or equivalent) when reading
revisionByItemID for item.ID and skip the item if the revision is missing or <=
0 (e.g., continue the loop) so only items with an actual version row are
enqueued; update the logic around revisionByItemID, item.ID and the surrounding
enqueuing code in main.go accordingly.


if dryRun {
log.Printf("[dry-run] item=%s slug=%s type=%s security_status=%s revision=%d",
item.ID, item.Slug, item.ItemType, item.SecurityStatus, revision)
continue
}

job, err := scanJobSvc.Enqueue(item.ID, revision, triggerType, "", services.ScanEnqueueOptions{
Priority: priority,
MaxAttempts: maxAttempts,
})
switch {
case err == nil && job != nil:
enqueued++
log.Printf("enqueued scan job item=%s slug=%s job=%s revision=%d", item.ID, item.Slug, job.ID, revision)
case err == nil && job == nil:
skipped++
log.Printf("skipped item=%s slug=%s reason=already-has-active-job", item.ID, item.Slug)
case err != nil && strings.Contains(err.Error(), services.ErrScanJobAlreadyQueued.Error()):
skipped++
log.Printf("skipped item=%s slug=%s reason=already-has-active-job", item.ID, item.Slug)
default:
failed++
log.Printf("failed item=%s slug=%s err=%v", item.ID, item.Slug, err)
}
}

if dryRun {
log.Printf("[dry-run] matched %d items", len(items))
return
}

log.Printf("backfill complete: matched=%d enqueued=%d skipped=%d failed=%d",
len(items), enqueued, skipped, failed)
}

func runPreMigrations(db *gorm.DB) error {
stmts := []struct {
check string
Expand Down
1 change: 1 addition & 0 deletions server/internal/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ type SecurityScan struct {
ItemRevision int `gorm:"not null;default:0" json:"itemRevision"`
TriggerType string `gorm:"not null" json:"triggerType"` // create | update | sync | manual
ScanModel string `json:"scanModel"`
Category string `gorm:"default:''" json:"category"`
RiskLevel string `gorm:"default:''" json:"riskLevel"` // clean | low | medium | high | extreme
Verdict string `gorm:"default:''" json:"verdict"` // safe | caution | reject
RedFlags datatypes.JSON `gorm:"type:jsonb;default:'[]'" json:"redFlags" swaggertype:"array,object"`
Expand Down
72 changes: 67 additions & 5 deletions server/internal/services/scan_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,40 @@ import (
"gorm.io/gorm"
)

var allowedScanCategories = []string{
"frontend-development",
"backend-development",
"system-architecture",
"requirements-analysis",
"system-design",
"data-processing",
"software-testing",
"tdd-development",
"information-security",
"command-execution",
"tool-invocation",
"deployment-operations",
}

const scanSystemPrompt = `你是一个专业的 AI 能力项安全审查员。
你需要对用户提交的 AI Agent Skill / MCP Server 配置进行安全审查。

## 分类要求

你还需要为该能力项选择一个最匹配的分类,只能从以下 slug 中选择一个:
- frontend-development(前端开发)
- backend-development(后端开发)
- system-architecture(系统架构)
- requirements-analysis(需求分析)
- system-design(系统设计)
- data-processing(数据处理)
- software-testing(软件测试)
- tdd-development(TDD 开发)
- information-security(信息安全)
- command-execution(命令执行)
- tool-invocation(工具调用)
- deployment-operations(部署运维)

## 审查维度

### 红线行为(出现任意一条 → risk_level=extreme)
Expand Down Expand Up @@ -50,6 +81,7 @@ const scanSystemPrompt = `你是一个专业的 AI 能力项安全审查员。
严格输出以下 JSON,不要添加任何额外文字:

{
"category": "从固定分类 slug 列表中选择一个",
"risk_level": "clean | low | medium | high | extreme",
"verdict": "safe | caution | reject",
"red_flags": ["具体描述发现的红线行为,引用原文"],
Expand Down Expand Up @@ -86,6 +118,7 @@ const scanUserPromptTemplate = `请对以下 AI 能力项进行安全审查:
const maxInputRunes = 6000

type ScanReport struct {
Category string `json:"category"`
RiskLevel string `json:"risk_level"`
Verdict string `json:"verdict"`
RedFlags []string `json:"red_flags"`
Expand All @@ -101,9 +134,10 @@ type Permissions struct {
}

type ScanService struct {
DB *gorm.DB
LLMClient *llm.Client
ModelName string
DB *gorm.DB
LLMClient *llm.Client
ModelName string
CategorySvc *CategoryService
}

func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision int, triggerType string) (*models.SecurityScan, error) {
Expand Down Expand Up @@ -140,6 +174,7 @@ func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision
ItemRevision: itemRevision,
TriggerType: triggerType,
ScanModel: s.ModelName,
Category: reportCategoryValue(report),
DurationMs: durationMs,
RawOutput: rawOutput,
}
Expand Down Expand Up @@ -180,10 +215,17 @@ func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision
return nil, dbErr
}

s.DB.Model(&item).Updates(map[string]any{
itemUpdates := map[string]any{
"security_status": report.RiskLevel,
"last_scan_id": scanRecord.ID,
})
}
if scanRecord.Category != "" {
itemUpdates["category"] = scanRecord.Category
}
s.DB.Model(&item).Updates(itemUpdates)
if scanRecord.Category != "" && s.CategorySvc != nil {
_, _ = s.CategorySvc.EnsureCategory(scanRecord.Category, "scan")
}

return scanRecord, nil
}
Expand Down Expand Up @@ -218,10 +260,20 @@ func (s *ScanService) callLLM(ctx context.Context, userPrompt string) (*ScanRepo
if !isValidVerdict(report.Verdict) {
return nil, raw, fmt.Errorf("invalid verdict in LLM output: %q", report.Verdict)
}
if !isValidScanCategory(report.Category) {
return nil, raw, fmt.Errorf("invalid category in LLM output: %q", report.Category)
}

return &report, raw, nil
}

func reportCategoryValue(report *ScanReport) string {
if report == nil {
return ""
}
return strings.TrimSpace(report.Category)
}

func extractJSON(s string) string {
// Strip markdown code fences (```json ... ``` or ``` ... ```)
s = strings.TrimSpace(s)
Expand Down Expand Up @@ -259,6 +311,16 @@ func isValidVerdict(v string) bool {
return false
}

func isValidScanCategory(v string) bool {
v = strings.TrimSpace(v)
for _, category := range allowedScanCategories {
if v == category {
return true
}
}
return false
}

func truncateContent(content string, maxRunes int) string {
if utf8.RuneCountInString(content) <= maxRunes {
return content
Expand Down
Loading
Loading