agentshield/.env.example at main · autralabs/agentshield · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# AgentShield Environment Variables
# Copy this file to .env and fill in your values
# NEVER commit .env to version control!

# =============================================================================
# REQUIRED: OpenAI API Key
# =============================================================================

# Your OpenAI API key - REQUIRED for:
#   1. LLM cleaning (recommended for best accuracy ~90%)
#   2. Finetuning the embedding model (data preparation step)
#
# Get your key from: https://platform.openai.com/api-keys
# Cost: ~$0.0003 per document for cleaning, ~$3-5 total for finetuning
OPENAI_API_KEY=sk-...

# =============================================================================
# Embedding Model Configuration
# =============================================================================

# Provider: "local", "openai", or "mlx"
# AGENTSHIELD_EMBEDDINGS__PROVIDER=local

# Model path - use your finetuned model for best accuracy (~95%)
# After running: python scripts/finetune_local.py
# AGENTSHIELD_EMBEDDINGS__MODEL=./agentshield-embeddings-finetuned
#
# Or use a pre-trained model (less accurate but no finetuning needed):
#   - all-MiniLM-L6-v2    (fast, ~70% accuracy)
#   - all-mpnet-base-v2   (better, ~80% accuracy)
# AGENTSHIELD_EMBEDDINGS__MODEL=all-MiniLM-L6-v2

# OpenAI provider model (when provider=openai)
# AGENTSHIELD_EMBEDDINGS__OPENAI_MODEL=text-embedding-3-small

# OpenAI-compatible embeddings endpoint overrides (OpenRouter/Together/Ollama/vLLM/etc.)
# AGENTSHIELD_EMBEDDINGS__BASE_URL=https://openrouter.ai/api/v1
# AGENTSHIELD_EMBEDDINGS__API_KEY=sk-or-provider-key
# AGENTSHIELD_EMBEDDINGS__DEFAULT_HEADERS='{"HTTP-Referer":"https://your-app.example"}'

# Optional explicit dimensions for unknown OpenAI-compatible embedding models
# AGENTSHIELD_EMBEDDINGS__DIMENSIONS=1536

# =============================================================================
# Text Cleaning Configuration
# =============================================================================

# Cleaning method - how AgentShield removes injection patterns before comparing
#
# Options:
#   - "heuristic"  : Free, fast, regex-based (~70% accuracy)
#   - "llm"        : Uses GPT-4o-mini, ~$0.0003/doc (~90% accuracy) [RECOMMENDED]
#
# AGENTSHIELD_CLEANING__METHOD=llm

# LLM model for cleaning (when method=llm)
# gpt-4o-mini is cheapest and works great for this task
# AGENTSHIELD_CLEANING__LLM_MODEL=gpt-4o-mini

# OpenAI-compatible cleaning endpoint overrides
# AGENTSHIELD_CLEANING__BASE_URL=https://openrouter.ai/api/v1
# AGENTSHIELD_CLEANING__API_KEY=sk-or-provider-key
# AGENTSHIELD_CLEANING__DEFAULT_HEADERS='{"HTTP-Referer":"https://your-app.example"}'

# =============================================================================
# ZEDD Detection Threshold
# =============================================================================

# Threshold determines when text is flagged as suspicious
#
# How it works:
#   - ZEDD computes "drift" = how much text changes after cleaning
#   - If drift > threshold → suspicious
#
# Options:
#   - Leave empty/null : Auto-resolve/calibrate from current pipeline fingerprint [RECOMMENDED]
#   - Set explicit value: e.g., 0.0083 (from your finetuned model)
#
# Higher threshold = fewer false positives, might miss some attacks
# Lower threshold  = catches more attacks, but more false positives
#
# AGENTSHIELD_ZEDD__THRESHOLD=

# =============================================================================
# Behavior on Detection
# =============================================================================

# What to do when a prompt injection is detected
#
# Options:
#   - "block"  : Raise PromptInjectionDetected exception (strict)
#   - "filter" : Remove suspicious documents silently (recommended for RAG)
#   - "flag"   : Add metadata but pass through (for logging/monitoring)
#   - "warn"   : Log warning but pass through (permissive)
#
# AGENTSHIELD_BEHAVIOR__ON_DETECT=filter

# Minimum confidence to trigger the on_detect action (0.0 to 1.0)
# Higher = only act on high-confidence detections
# AGENTSHIELD_BEHAVIOR__CONFIDENCE_THRESHOLD=0.5

# =============================================================================
# Performance Settings
# =============================================================================

# Batch size for embedding operations (higher = faster but more memory)
# AGENTSHIELD_PERFORMANCE__BATCH_SIZE=32

# Cache embeddings to avoid recomputing for repeated texts
# AGENTSHIELD_PERFORMANCE__CACHE_EMBEDDINGS=true

# Optional cache directory for thresholds and dimensions_cache.json
# AGENTSHIELD_PERFORMANCE__CACHE_DIR=~/.agentshield

# =============================================================================
# Logging
# =============================================================================

# Log level: DEBUG, INFO, WARNING, ERROR
# AGENTSHIELD_LOGGING__LEVEL=INFO

# =============================================================================
# Finetuning Configuration (for scripts/finetune_local.py)
# =============================================================================

# These are used when running the finetuning script, not at runtime

# Max samples to process (more = better model but higher API cost)
# Default: 5000 samples costs ~$3-5 in OpenAI API calls
# FINETUNE_MAX_SAMPLES=5000

# Training batch size (reduce if you get OOM errors)
# For 16GB Mac: use 8
# For 8GB Mac: use 4
# FINETUNE_BATCH_SIZE=8

# Output directory for finetuned model
# FINETUNE_OUTPUT_DIR=./agentshield-embeddings-finetuned