Skip to content

Commit f8979b0

Browse files
authored
Merge pull request #42 from XDfield/feat/refactor-search
Feat/refactor search
2 parents a9ec135 + 92ab675 commit f8979b0

8 files changed

Lines changed: 563 additions & 112 deletions

File tree

docs/proposals/SKILL_SCAN_DESIGN.md

Lines changed: 116 additions & 99 deletions
Large diffs are not rendered by default.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"dev:all": "concurrently \"npm run dev:api\" \"npm run dev:worker\" \"npm run dev:gateway\"",
1212
"build:api": "cd server && go build -o bin/server ./cmd/api",
1313
"build:worker": "cd server && go build -o bin/worker ./cmd/worker",
14+
"backfill:scan": "cd server && go run ./cmd/worker -- scan-backfill",
1415
"build:gateway": "cd gateway && go build -o bin/gateway ./cmd",
1516
"build": "npm run build:api && npm run build:worker && npm run build:gateway",
1617
"docker:up": "docker-compose up -d",

portal

server/cmd/worker/main.go

Lines changed: 158 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ package main
22

33
import (
44
"context"
5+
"flag"
56
"fmt"
67
"log"
78
"os"
89
"os/signal"
910
"strconv"
11+
"strings"
1012
"syscall"
1113
"time"
1214

@@ -22,6 +24,15 @@ import (
2224
)
2325

2426
func main() {
27+
if len(os.Args) > 1 && os.Args[1] == "scan-backfill" {
28+
runScanBackfill(os.Args[2:])
29+
return
30+
}
31+
32+
runWorker()
33+
}
34+
35+
func runWorker() {
2536
// Initialise structured logging with daily rotation and 7-day retention.
2637
// Worker uses a "worker" file prefix to separate logs from the API server:
2738
// logs/worker-app.log – all worker messages (DEBUG+), including full SQL
@@ -79,11 +90,13 @@ func main() {
7990
tmpDir = os.TempDir() + "/costrict-sync"
8091
}
8192

93+
scanJobSvc := &services.ScanJobService{DB: db}
8294
syncSvc := &services.SyncService{
83-
DB: db,
84-
Git: &services.GitService{TempBaseDir: tmpDir},
85-
Parser: &services.ParserService{},
86-
CategorySvc: &services.CategoryService{DB: db},
95+
DB: db,
96+
Git: &services.GitService{TempBaseDir: tmpDir},
97+
Parser: &services.ParserService{},
98+
ScanJobService: scanJobSvc,
99+
CategorySvc: &services.CategoryService{DB: db},
87100
}
88101

89102
concurrency, _ := strconv.Atoi(os.Getenv("WORKER_CONCURRENCY"))
@@ -120,9 +133,10 @@ func main() {
120133

121134
scanLLMClient := llm.NewClient(&llmCfg)
122135
scanSvc := &services.ScanService{
123-
DB: db,
124-
LLMClient: scanLLMClient,
125-
ModelName: llmCfg.Model,
136+
DB: db,
137+
LLMClient: scanLLMClient,
138+
ModelName: llmCfg.Model,
139+
CategorySvc: &services.CategoryService{DB: db},
126140
}
127141

128142
scanConcurrency, _ := strconv.Atoi(os.Getenv("SCAN_WORKER_CONCURRENCY"))
@@ -157,6 +171,143 @@ func main() {
157171
log.Println("Worker pools stopped")
158172
}
159173

174+
type latestRevisionRow struct {
175+
ItemID string
176+
LatestRevision int
177+
}
178+
179+
func runScanBackfill(args []string) {
180+
logger.Init(logger.Config{
181+
Dir: "./logs",
182+
FilePrefix: "worker",
183+
MaxAgeDays: 7,
184+
Console: true,
185+
ConsoleLevel: "warn",
186+
})
187+
188+
fs := flag.NewFlagSet("scan-backfill", flag.ExitOnError)
189+
190+
var (
191+
allItems bool
192+
securityStatus string
193+
registryID string
194+
itemType string
195+
limit int
196+
triggerType string
197+
priority int
198+
maxAttempts int
199+
dryRun bool
200+
)
201+
202+
fs.BoolVar(&allItems, "all", false, "enqueue all active items instead of filtering by security status")
203+
fs.StringVar(&securityStatus, "security-status", "unscanned", "filter by capability_items.security_status when --all=false")
204+
fs.StringVar(&registryID, "registry-id", "", "only enqueue items in the given registry")
205+
fs.StringVar(&itemType, "item-type", "", "only enqueue items of the given type (skill|subagent|command|mcp)")
206+
fs.IntVar(&limit, "limit", 0, "max number of items to enqueue (0 = no limit)")
207+
fs.StringVar(&triggerType, "trigger-type", "manual", "scan job trigger type to record")
208+
fs.IntVar(&priority, "priority", 1, "scan job priority")
209+
fs.IntVar(&maxAttempts, "max-attempts", 2, "scan job max attempts")
210+
fs.BoolVar(&dryRun, "dry-run", false, "preview items without inserting scan jobs")
211+
_ = fs.Parse(args)
212+
213+
cfg := config.Load()
214+
db, err := database.Initialize(cfg.DatabaseURL)
215+
if err != nil {
216+
log.Fatalf("failed to initialize database: %v", err)
217+
}
218+
219+
query := db.Model(&models.CapabilityItem{}).Where("status = ?", "active")
220+
if !allItems {
221+
query = query.Where("security_status = ?", securityStatus)
222+
}
223+
if registryID != "" {
224+
query = query.Where("registry_id = ?", registryID)
225+
}
226+
if itemType != "" {
227+
query = query.Where("item_type = ?", itemType)
228+
}
229+
if limit > 0 {
230+
query = query.Limit(limit)
231+
}
232+
233+
var items []models.CapabilityItem
234+
if err := query.Order("created_at ASC").Find(&items).Error; err != nil {
235+
log.Fatalf("failed to query items: %v", err)
236+
}
237+
238+
if len(items) == 0 {
239+
log.Println("no matching items found")
240+
return
241+
}
242+
243+
itemIDs := make([]string, 0, len(items))
244+
for _, item := range items {
245+
itemIDs = append(itemIDs, item.ID)
246+
}
247+
248+
var revisions []latestRevisionRow
249+
if err := db.Model(&models.CapabilityVersion{}).
250+
Select("item_id, COALESCE(MAX(revision), 0) AS latest_revision").
251+
Where("item_id IN ?", itemIDs).
252+
Group("item_id").
253+
Scan(&revisions).Error; err != nil {
254+
log.Fatalf("failed to query latest revisions: %v", err)
255+
}
256+
257+
revisionByItemID := make(map[string]int, len(revisions))
258+
for _, row := range revisions {
259+
revisionByItemID[row.ItemID] = row.LatestRevision
260+
}
261+
262+
scanJobSvc := &services.ScanJobService{DB: db}
263+
264+
var (
265+
enqueued int
266+
skipped int
267+
failed int
268+
)
269+
270+
for _, item := range items {
271+
revision := revisionByItemID[item.ID]
272+
if revision <= 0 {
273+
revision = 1
274+
}
275+
276+
if dryRun {
277+
log.Printf("[dry-run] item=%s slug=%s type=%s security_status=%s revision=%d",
278+
item.ID, item.Slug, item.ItemType, item.SecurityStatus, revision)
279+
continue
280+
}
281+
282+
job, err := scanJobSvc.Enqueue(item.ID, revision, triggerType, "", services.ScanEnqueueOptions{
283+
Priority: priority,
284+
MaxAttempts: maxAttempts,
285+
})
286+
switch {
287+
case err == nil && job != nil:
288+
enqueued++
289+
log.Printf("enqueued scan job item=%s slug=%s job=%s revision=%d", item.ID, item.Slug, job.ID, revision)
290+
case err == nil && job == nil:
291+
skipped++
292+
log.Printf("skipped item=%s slug=%s reason=already-has-active-job", item.ID, item.Slug)
293+
case err != nil && strings.Contains(err.Error(), services.ErrScanJobAlreadyQueued.Error()):
294+
skipped++
295+
log.Printf("skipped item=%s slug=%s reason=already-has-active-job", item.ID, item.Slug)
296+
default:
297+
failed++
298+
log.Printf("failed item=%s slug=%s err=%v", item.ID, item.Slug, err)
299+
}
300+
}
301+
302+
if dryRun {
303+
log.Printf("[dry-run] matched %d items", len(items))
304+
return
305+
}
306+
307+
log.Printf("backfill complete: matched=%d enqueued=%d skipped=%d failed=%d",
308+
len(items), enqueued, skipped, failed)
309+
}
310+
160311
func runPreMigrations(db *gorm.DB) error {
161312
stmts := []struct {
162313
check string

server/internal/models/models.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ type SecurityScan struct {
397397
ItemRevision int `gorm:"not null;default:0" json:"itemRevision"`
398398
TriggerType string `gorm:"not null" json:"triggerType"` // create | update | sync | manual
399399
ScanModel string `json:"scanModel"`
400+
Category string `gorm:"default:''" json:"category"`
400401
RiskLevel string `gorm:"default:''" json:"riskLevel"` // clean | low | medium | high | extreme
401402
Verdict string `gorm:"default:''" json:"verdict"` // safe | caution | reject
402403
RedFlags datatypes.JSON `gorm:"type:jsonb;default:'[]'" json:"redFlags" swaggertype:"array,object"`

server/internal/services/scan_service.go

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,40 @@ import (
1515
"gorm.io/gorm"
1616
)
1717

18+
var allowedScanCategories = []string{
19+
"frontend-development",
20+
"backend-development",
21+
"system-architecture",
22+
"requirements-analysis",
23+
"system-design",
24+
"data-processing",
25+
"software-testing",
26+
"tdd-development",
27+
"information-security",
28+
"command-execution",
29+
"tool-invocation",
30+
"deployment-operations",
31+
}
32+
1833
const scanSystemPrompt = `你是一个专业的 AI 能力项安全审查员。
1934
你需要对用户提交的 AI Agent Skill / MCP Server 配置进行安全审查。
2035
36+
## 分类要求
37+
38+
你还需要为该能力项选择一个最匹配的分类,只能从以下 slug 中选择一个:
39+
- frontend-development(前端开发)
40+
- backend-development(后端开发)
41+
- system-architecture(系统架构)
42+
- requirements-analysis(需求分析)
43+
- system-design(系统设计)
44+
- data-processing(数据处理)
45+
- software-testing(软件测试)
46+
- tdd-development(TDD 开发)
47+
- information-security(信息安全)
48+
- command-execution(命令执行)
49+
- tool-invocation(工具调用)
50+
- deployment-operations(部署运维)
51+
2152
## 审查维度
2253
2354
### 红线行为(出现任意一条 → risk_level=extreme)
@@ -50,6 +81,7 @@ const scanSystemPrompt = `你是一个专业的 AI 能力项安全审查员。
5081
严格输出以下 JSON,不要添加任何额外文字:
5182
5283
{
84+
"category": "从固定分类 slug 列表中选择一个",
5385
"risk_level": "clean | low | medium | high | extreme",
5486
"verdict": "safe | caution | reject",
5587
"red_flags": ["具体描述发现的红线行为,引用原文"],
@@ -86,6 +118,7 @@ const scanUserPromptTemplate = `请对以下 AI 能力项进行安全审查:
86118
const maxInputRunes = 6000
87119

88120
type ScanReport struct {
121+
Category string `json:"category"`
89122
RiskLevel string `json:"risk_level"`
90123
Verdict string `json:"verdict"`
91124
RedFlags []string `json:"red_flags"`
@@ -101,9 +134,10 @@ type Permissions struct {
101134
}
102135

103136
type ScanService struct {
104-
DB *gorm.DB
105-
LLMClient *llm.Client
106-
ModelName string
137+
DB *gorm.DB
138+
LLMClient *llm.Client
139+
ModelName string
140+
CategorySvc *CategoryService
107141
}
108142

109143
func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision int, triggerType string) (*models.SecurityScan, error) {
@@ -140,6 +174,7 @@ func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision
140174
ItemRevision: itemRevision,
141175
TriggerType: triggerType,
142176
ScanModel: s.ModelName,
177+
Category: reportCategoryValue(report),
143178
DurationMs: durationMs,
144179
RawOutput: rawOutput,
145180
}
@@ -180,10 +215,17 @@ func (s *ScanService) ScanItem(ctx context.Context, itemID string, itemRevision
180215
return nil, dbErr
181216
}
182217

183-
s.DB.Model(&item).Updates(map[string]any{
218+
itemUpdates := map[string]any{
184219
"security_status": report.RiskLevel,
185220
"last_scan_id": scanRecord.ID,
186-
})
221+
}
222+
if scanRecord.Category != "" {
223+
itemUpdates["category"] = scanRecord.Category
224+
}
225+
s.DB.Model(&item).Updates(itemUpdates)
226+
if scanRecord.Category != "" && s.CategorySvc != nil {
227+
_, _ = s.CategorySvc.EnsureCategory(scanRecord.Category, "scan")
228+
}
187229

188230
return scanRecord, nil
189231
}
@@ -218,10 +260,20 @@ func (s *ScanService) callLLM(ctx context.Context, userPrompt string) (*ScanRepo
218260
if !isValidVerdict(report.Verdict) {
219261
return nil, raw, fmt.Errorf("invalid verdict in LLM output: %q", report.Verdict)
220262
}
263+
if !isValidScanCategory(report.Category) {
264+
return nil, raw, fmt.Errorf("invalid category in LLM output: %q", report.Category)
265+
}
221266

222267
return &report, raw, nil
223268
}
224269

270+
func reportCategoryValue(report *ScanReport) string {
271+
if report == nil {
272+
return ""
273+
}
274+
return strings.TrimSpace(report.Category)
275+
}
276+
225277
func extractJSON(s string) string {
226278
// Strip markdown code fences (```json ... ``` or ``` ... ```)
227279
s = strings.TrimSpace(s)
@@ -259,6 +311,16 @@ func isValidVerdict(v string) bool {
259311
return false
260312
}
261313

314+
func isValidScanCategory(v string) bool {
315+
v = strings.TrimSpace(v)
316+
for _, category := range allowedScanCategories {
317+
if v == category {
318+
return true
319+
}
320+
}
321+
return false
322+
}
323+
262324
func truncateContent(content string, maxRunes int) string {
263325
if utf8.RuneCountInString(content) <= maxRunes {
264326
return content

0 commit comments

Comments
 (0)