Skip to content

Commit c0a3c8f

Browse files
rayketchamclaude
andcommitted
security: SSRF protection + ruff format CI fix
- Block private/loopback/link-local IPs in URL ingestion (SSRF fix) - validate_url() now resolves hostnames and rejects private ranges - fetch_url_content() calls validate_url() before any HTTP request - Disabled follow_redirects to prevent redirect-chain SSRF bypass - Applied ruff format to all files (CI formatting check was failing) Tests: 356 → 362 (+6 SSRF protection tests) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 4c6a2dc commit c0a3c8f

19 files changed

Lines changed: 242 additions & 121 deletions

src/project_forge/cron/horizontal.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,7 @@ async def run_horizontal_cycle(db: Database) -> list[Idea]:
132132

133133
if len(ideas) < 2:
134134
# Fallback: generate a second cross-category idea with a different pair
135-
cat_c, cat_d = await pick_cross_category_pair(
136-
db, exclude=[(cat_a, cat_b)]
137-
)
135+
cat_c, cat_d = await pick_cross_category_pair(db, exclude=[(cat_a, cat_b)])
138136
idea2 = await generate_cross_idea(db, cat_c, cat_d)
139137
await db.save_idea(idea2)
140138
await db.record_category_pair(cat_c.value, cat_d.value, idea2.id)

src/project_forge/engine/introspect.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,17 @@ def gather_self_context() -> dict:
5151
# --- Open GitHub issues ---
5252
open_issues: list[dict] = []
5353
try:
54-
result = _run([
55-
"gh", "issue", "list",
56-
"--state", "open",
57-
"--json", "title,number,labels,url",
58-
])
54+
result = _run(
55+
[
56+
"gh",
57+
"issue",
58+
"list",
59+
"--state",
60+
"open",
61+
"--json",
62+
"title,number,labels,url",
63+
]
64+
)
5965
if result.returncode == 0 and result.stdout.strip():
6066
open_issues = json.loads(result.stdout)
6167
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError) as exc:
@@ -164,8 +170,7 @@ def build_introspection_prompt(context: dict, recent_improvements: list[str]) ->
164170
issues = context.get("open_issues", [])
165171
if issues:
166172
issues_lines = "\n".join(
167-
f"- #{i.get('number', '?')}: {i.get('title', '(no title)')}{i.get('url', '')}"
168-
for i in issues
173+
f"- #{i.get('number', '?')}: {i.get('title', '(no title)')}{i.get('url', '')}" for i in issues
169174
)
170175
else:
171176
issues_lines = "(no open issues)"

src/project_forge/engine/url_ingest.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""URL ingestion engine — fetch URLs, extract content, generate ideas."""
22

3+
import ipaddress
34
import re
5+
import socket
46
from dataclasses import dataclass
57
from urllib.parse import parse_qs, urlencode, urlparse
68

@@ -31,16 +33,70 @@ class UrlContent:
3133
text: str
3234

3335

36+
def _check_ssrf(hostname: str) -> None:
37+
"""Resolve hostname and raise ValueError if it resolves to a private/reserved address.
38+
39+
Protects against Server-Side Request Forgery (SSRF) by blocking requests
40+
to loopback, private, link-local, and other reserved IP ranges.
41+
42+
Raises:
43+
ValueError: If the hostname resolves to a non-public IP address.
44+
socket.gaierror: If the hostname cannot be resolved (propagated to caller).
45+
"""
46+
try:
47+
addrinfos = socket.getaddrinfo(hostname, None)
48+
except socket.gaierror:
49+
# DNS resolution failure — not a private IP issue; let caller handle
50+
raise
51+
52+
for addrinfo in addrinfos:
53+
raw_ip = addrinfo[4][0]
54+
try:
55+
addr = ipaddress.ip_address(raw_ip)
56+
except ValueError:
57+
continue
58+
if addr.is_loopback or addr.is_private or addr.is_link_local or addr.is_reserved:
59+
raise ValueError(f"Requests to private/reserved addresses are not allowed: {raw_ip}")
60+
61+
3462
def validate_url(url: str) -> bool:
35-
"""Check if URL is valid http(s)."""
63+
"""Check if URL is valid http(s) and does not point to a private/reserved address.
64+
65+
Returns:
66+
True if the URL is structurally valid and resolves to a public address.
67+
68+
Raises:
69+
ValueError: If the URL resolves to a private, loopback, or link-local IP (SSRF guard).
70+
"""
3671
if not url:
3772
return False
3873
try:
3974
parsed = urlparse(url)
40-
return parsed.scheme in ("http", "https") and bool(parsed.netloc)
75+
if not (parsed.scheme in ("http", "https") and bool(parsed.netloc)):
76+
return False
4177
except Exception:
4278
return False
4379

80+
# Strip port from netloc to get bare hostname for DNS resolution
81+
hostname = parsed.hostname
82+
if not hostname:
83+
return False
84+
85+
# For bare IP addresses, validate directly without a DNS lookup
86+
try:
87+
addr = ipaddress.ip_address(hostname)
88+
if addr.is_loopback or addr.is_private or addr.is_link_local or addr.is_reserved:
89+
raise ValueError(f"Requests to private/reserved addresses are not allowed: {hostname}")
90+
return True
91+
except ValueError as exc:
92+
# Re-raise only the SSRF guard errors; ignore the "not a valid IP" parse error
93+
if "not allowed" in str(exc):
94+
raise
95+
96+
# Hostname is not a bare IP — resolve via DNS
97+
_check_ssrf(hostname)
98+
return True
99+
44100

45101
def extract_domain(url: str) -> str:
46102
"""Extract clean domain from URL (strip www.)."""
@@ -63,8 +119,19 @@ def clean_url(url: str) -> str:
63119

64120

65121
async def fetch_url_content(url: str) -> UrlContent:
66-
"""Fetch URL and extract content."""
67-
async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client:
122+
"""Fetch URL and extract content.
123+
124+
Validates the URL for SSRF safety before making any network request.
125+
Redirects are disabled to prevent redirect-based SSRF bypasses.
126+
127+
Raises:
128+
ValueError: If the URL resolves to a private/reserved address.
129+
UrlFetchError: If the HTTP response indicates an error (status >= 400).
130+
"""
131+
# SSRF guard — must run before any network I/O
132+
validate_url(url)
133+
134+
async with httpx.AsyncClient(follow_redirects=False, timeout=30.0) as client:
68135
response = await client.get(url)
69136

70137
if response.status_code >= 400:

tests/test_compare.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,7 @@ async def test_compare_with_repo_from_list(self, client_with_idea):
277277
):
278278
# Simulate list_org_repos returning realistic gh output
279279
mock_run.return_value.returncode = 0
280-
mock_run.return_value.stdout = (
281-
"rayketcham-lab/pki-ca-engine\tPKI CA\tpublic\n"
282-
)
280+
mock_run.return_value.stdout = "rayketcham-lab/pki-ca-engine\tPKI CA\tpublic\n"
283281
mock_run.return_value.stderr = ""
284282

285283
resp = await client.get("/api/repos")

tests/test_csp_security.py

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,9 @@ async def test_csp_no_unsafe_inline_scripts(client):
3131
assert csp, "CSP header is missing entirely"
3232

3333
# Extract only the script-src directive value
34-
directives = {
35-
part.strip().split()[0]: part.strip()
36-
for part in csp.split(";")
37-
if part.strip()
38-
}
34+
directives = {part.strip().split()[0]: part.strip() for part in csp.split(";") if part.strip()}
3935
script_src = directives.get("script-src", "")
40-
assert "'unsafe-inline'" not in script_src, (
41-
f"'unsafe-inline' must not appear in script-src. Got: {script_src!r}"
42-
)
36+
assert "'unsafe-inline'" not in script_src, f"'unsafe-inline' must not appear in script-src. Got: {script_src!r}"
4337

4438

4539
def test_no_inline_script_blocks_in_templates():
@@ -57,10 +51,7 @@ def test_no_inline_script_blocks_in_templates():
5751
if stripped.startswith("<script") and "src=" not in stripped:
5852
violations.append(f"{path.name}:{lineno}: {line.strip()}")
5953

60-
assert not violations, (
61-
"Inline <script> blocks found in templates — move to app.js:\n"
62-
+ "\n".join(violations)
63-
)
54+
assert not violations, "Inline <script> blocks found in templates — move to app.js:\n" + "\n".join(violations)
6455

6556

6657
def test_no_onclick_handlers_in_templates():
@@ -76,9 +67,8 @@ def test_no_onclick_handlers_in_templates():
7667
if "onclick=" in line:
7768
violations.append(f"{path.name}:{lineno}: {line.strip()}")
7869

79-
assert not violations, (
80-
"onclick= attributes found in templates — move to addEventListener in app.js:\n"
81-
+ "\n".join(violations)
70+
assert not violations, "onclick= attributes found in templates — move to addEventListener in app.js:\n" + "\n".join(
71+
violations
8272
)
8373

8474

@@ -95,14 +85,10 @@ def test_app_js_has_promote_function():
9585
def test_app_js_has_reject_function():
9686
"""app.js must define the rejectProposal function."""
9787
content = APP_JS.read_text()
98-
assert "rejectProposal" in content, (
99-
"rejectProposal function not found in app.js"
100-
)
88+
assert "rejectProposal" in content, "rejectProposal function not found in app.js"
10189

10290

10391
def test_app_js_has_switch_tab():
10492
"""app.js must define the switchTab function (already present, must not be removed)."""
10593
content = APP_JS.read_text()
106-
assert "function switchTab" in content, (
107-
"switchTab function not found in app.js — must not be removed"
108-
)
94+
assert "function switchTab" in content, "switchTab function not found in app.js — must not be removed"

tests/test_dashboard_rendering.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ async def test_super_ideas_stat_shows_count_not_score(self, client):
9191
number_line = [line for line in lines if "stat-number" in line][-1]
9292
# Extract the number
9393
import re
94+
9495
match = re.search(r">(\d+)<", number_line)
9596
assert match, f"Super Ideas stat should be an integer, got: {number_line}"
9697
count = int(match.group(1))

tests/test_governance_banner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Tests for governance banner — autonomous project under human authority."""
22

3-
43
import pytest
54
import pytest_asyncio
65
from httpx import ASGITransport, AsyncClient

tests/test_horizontal.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,15 @@ async def test_exclude_parameter(self, db):
106106
class TestGenerateCrossIdea:
107107
@pytest.mark.asyncio
108108
async def test_generates_valid_idea(self, db):
109-
idea = await generate_cross_idea(
110-
db, IdeaCategory.PQC_CRYPTOGRAPHY, IdeaCategory.COMPLIANCE
111-
)
109+
idea = await generate_cross_idea(db, IdeaCategory.PQC_CRYPTOGRAPHY, IdeaCategory.COMPLIANCE)
112110
assert isinstance(idea, Idea)
113111
assert idea.name
114112
assert idea.description
115113
assert idea.feasibility_score > 0
116114

117115
@pytest.mark.asyncio
118116
async def test_cross_idea_has_crossover_content(self, db):
119-
idea = await generate_cross_idea(
120-
db, IdeaCategory.SECURITY_TOOL, IdeaCategory.PRIVACY
121-
)
117+
idea = await generate_cross_idea(db, IdeaCategory.SECURITY_TOOL, IdeaCategory.PRIVACY)
122118
# The description should reference cross-domain concepts
123119
assert "cross" in idea.description.lower() or "meets" in idea.description.lower()
124120

tests/test_introspect.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,18 @@ class TestBuildIntrospectionPrompt:
9595
def _make_context(self) -> dict:
9696
return {
9797
"open_issues": [
98-
{"number": 42, "title": "Add rate limiting to API", "labels": [], "url": "https://github.com/x/y/issues/42"},
99-
{"number": 7, "title": "Tests missing for scorer module", "labels": [], "url": "https://github.com/x/y/issues/7"},
98+
{
99+
"number": 42,
100+
"title": "Add rate limiting to API",
101+
"labels": [],
102+
"url": "https://github.com/x/y/issues/42",
103+
},
104+
{
105+
"number": 7,
106+
"title": "Tests missing for scorer module",
107+
"labels": [],
108+
"url": "https://github.com/x/y/issues/7",
109+
},
100110
],
101111
"recent_commits": [
102112
"abc1234 feat: add dashboard tabs",

tests/test_introspect_runner.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ async def test_run_introspect_cycle_generates_idea(self):
4949
)
5050
mock_generator.generate = AsyncMock(return_value=fake_idea)
5151

52-
with patch("project_forge.cron.introspect_runner.gather_self_context", return_value={
53-
"open_issues": [], "recent_commits": [], "test_count": 10,
54-
"lint_status": "clean", "code_stats": {"src": 1000, "tests": 500},
55-
}):
52+
with patch(
53+
"project_forge.cron.introspect_runner.gather_self_context",
54+
return_value={
55+
"open_issues": [],
56+
"recent_commits": [],
57+
"test_count": 10,
58+
"lint_status": "clean",
59+
"code_stats": {"src": 1000, "tests": 500},
60+
},
61+
):
5662
idea = await run_introspect_cycle(mock_db, mock_generator)
5763

5864
assert idea is not None
@@ -92,10 +98,19 @@ async def test_run_introspect_cycle_avoids_recent_names(self):
9298
)
9399
mock_generator.generate = AsyncMock(return_value=fake_idea)
94100

95-
with patch("project_forge.cron.introspect_runner.gather_self_context", return_value={
96-
"open_issues": [], "recent_commits": [], "test_count": 10,
97-
"lint_status": "clean", "code_stats": {},
98-
}), patch("project_forge.cron.introspect_runner.build_introspection_prompt") as mock_prompt:
101+
with (
102+
patch(
103+
"project_forge.cron.introspect_runner.gather_self_context",
104+
return_value={
105+
"open_issues": [],
106+
"recent_commits": [],
107+
"test_count": 10,
108+
"lint_status": "clean",
109+
"code_stats": {},
110+
},
111+
),
112+
patch("project_forge.cron.introspect_runner.build_introspection_prompt") as mock_prompt,
113+
):
99114
mock_prompt.return_value = "fake prompt"
100115
await run_introspect_cycle(mock_db, mock_generator)
101116

0 commit comments

Comments
 (0)