From d1d59c37a57b191c48f35dcb645ed34c07a872d6 Mon Sep 17 00:00:00 2001
From: zy84338719 <zy84338719@hotmail.com>
Date: Sun, 28 Jun 2026 16:48:43 +0800
Subject: [PATCH 1/2] fix: add configurable HTTP timeout for LLM API calls

The LLM HTTP client had no user-configurable request timeout. When the
LLM API becomes unresponsive (e.g., rate limiting, network issues), the
ocr process hangs indefinitely with 0% CPU, holding an ESTABLISHED TCP
connection that never completes.

Changes:
- Add Timeout field to ResolvedEndpoint struct
- Add OCR_LLM_TIMEOUT environment variable (value in seconds)
- Add timeout_sec field to config.json llm and provider sections
- Pass timeout from ResolvedEndpoint to ClientConfig in NewLLMClient

The default timeout remains 5 minutes when not explicitly configured.
Users can now set a shorter timeout via:
  - Environment: OCR_LLM_TIMEOUT=120
  - Config file: {"llm": {"timeout_sec": 120}}

Fixes #237
---
 internal/llm/client.go   |  1 +
 internal/llm/resolver.go | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/internal/llm/client.go b/internal/llm/client.go
index afbec687..685c7170 100644
--- a/internal/llm/client.go
+++ b/internal/llm/client.go
@@ -198,6 +198,7 @@ func NewLLMClient(ep ResolvedEndpoint) LLMClient {
 		APIKey:       ep.Token,
 		Model:        ep.Model,
 		AuthHeader:   ep.AuthHeader,
+		Timeout:      ep.Timeout,
 		ExtraBody:    ep.ExtraBody,
 		ExtraHeaders: ep.ExtraHeaders,
 	}
diff --git a/internal/llm/resolver.go b/internal/llm/resolver.go
index 8ed011d8..47124e64 100644
--- a/internal/llm/resolver.go
+++ b/internal/llm/resolver.go
@@ -6,7 +6,9 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"strconv"
 	"strings"
+	"time"
 )
 
 // ResolvedEndpoint holds the resolved LLM endpoint configuration.
@@ -19,6 +21,7 @@ type ResolvedEndpoint struct {
 	Source       string            // human-readable config source label
 	ExtraBody    map[string]any    // vendor-specific request body fields
 	ExtraHeaders map[string]string // extra HTTP headers for the LLM request
+	Timeout      time.Duration     // per-request HTTP timeout; 0 means use default (5 min)
 }
 
 // Environment variable names for OCR-specific configuration.
@@ -28,6 +31,7 @@ const (
 	envOCRLLMModel        = "OCR_LLM_MODEL"
 	envOCRLLMAuthHeader   = "OCR_LLM_AUTH_HEADER"
 	envOCRLLMExtraHeaders = "OCR_LLM_EXTRA_HEADERS"
+	envOCRLLMTimeout      = "OCR_LLM_TIMEOUT"
 	envOCRUseAnthropic    = "OCR_USE_ANTHROPIC"
 )
 
@@ -122,7 +126,18 @@ func tryOCREnv(modelOverride string) (ResolvedEndpoint, bool, error) {
 		}
 	}
 
-	return ResolvedEndpoint{URL: url, Token: token, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR environment", ExtraHeaders: extraHeaders}, true, nil
+	var timeout time.Duration
+	if timeoutStr := os.Getenv(envOCRLLMTimeout); timeoutStr != "" {
+		timeoutSec, err := strconv.Atoi(strings.TrimSpace(timeoutStr))
+		if err != nil {
+			return ResolvedEndpoint{}, false, fmt.Errorf("OCR_LLM_TIMEOUT must be an integer (seconds): %w", err)
+		}
+		if timeoutSec > 0 {
+			timeout = time.Duration(timeoutSec) * time.Second
+		}
+	}
+
+	return ResolvedEndpoint{URL: url, Token: token, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR environment", ExtraHeaders: extraHeaders, Timeout: timeout}, true, nil
 }
 
 // llmFileConfig represents the llm section in config.json.
@@ -132,6 +147,7 @@ type llmFileConfig struct {
 	AuthHeader   string            `json:"auth_header,omitempty"`
 	Model        string            `json:"model,omitempty"`
 	UseAnthropic *bool             `json:"use_anthropic,omitempty"` // pointer to distinguish unset from false
+	TimeoutSec   int               `json:"timeout_sec,omitempty"`  // per-request HTTP timeout in seconds
 	ExtraBody    map[string]any    `json:"extra_body,omitempty"`
 	ExtraHeaders map[string]string `json:"extra_headers,omitempty"`
 }
@@ -144,6 +160,7 @@ type providerEntryConfig struct {
 	Model        string            `json:"model,omitempty"`
 	Models       []string          `json:"models,omitempty"`
 	AuthHeader   string            `json:"auth_header,omitempty"`
+	TimeoutSec   int               `json:"timeout_sec,omitempty"` // per-request HTTP timeout in seconds
 	ExtraBody    map[string]any    `json:"extra_body,omitempty"`
 	ExtraHeaders map[string]string `json:"extra_headers,omitempty"`
 }
@@ -288,6 +305,11 @@ func tryProviderConfig(cfg configFile, modelOverride string) (ResolvedEndpoint,
 	extraBody = entry.ExtraBody
 	extraHeaders := entry.ExtraHeaders
 
+	var timeout time.Duration
+	if entry.TimeoutSec > 0 {
+		timeout = time.Duration(entry.TimeoutSec) * time.Second
+	}
+
 	if protocol == "anthropic" {
 		url = ensureMessagesSuffix(url)
 	}
@@ -301,6 +323,7 @@ func tryProviderConfig(cfg configFile, modelOverride string) (ResolvedEndpoint,
 		Source:       "provider:" + cfg.Provider,
 		ExtraBody:    extraBody,
 		ExtraHeaders: extraHeaders,
+		Timeout:      timeout,
 	}, true, nil
 }
 
@@ -336,7 +359,12 @@ func tryLegacyLlmConfig(cfg configFile, modelOverride string) (ResolvedEndpoint,
 		}
 	}
 
-	return ResolvedEndpoint{URL: cfg.Llm.URL, Token: cfg.Llm.AuthToken, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR config file", ExtraBody: cfg.Llm.ExtraBody, ExtraHeaders: cfg.Llm.ExtraHeaders}, true, nil
+	var timeout time.Duration
+	if cfg.Llm.TimeoutSec > 0 {
+		timeout = time.Duration(cfg.Llm.TimeoutSec) * time.Second
+	}
+
+	return ResolvedEndpoint{URL: cfg.Llm.URL, Token: cfg.Llm.AuthToken, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR config file", ExtraBody: cfg.Llm.ExtraBody, ExtraHeaders: cfg.Llm.ExtraHeaders, Timeout: timeout}, true, nil
 }
 
 // tryCCEnv reads Claude Code environment variables.

From 69e557574a0cdb79bcf6a3cb0c13b2ab39cd9a97 Mon Sep 17 00:00:00 2001
From: zy84338719 <zy84338719@hotmail.com>
Date: Sun, 28 Jun 2026 16:58:07 +0800
Subject: [PATCH 2/2] fix: address Copilot review feedback

- Make OCR_LLM_TIMEOUT a global override (works with any endpoint strategy)
- Add proper overflow detection for timeout values (check before multiply)
- Add validateTimeoutSec helper with negative/overflow guards
- Add comprehensive tests for timeout parsing, validation, and forwarding
- Reject negative timeout_sec in config files with explicit error
---
 internal/llm/resolver.go      |  73 +++++++++++----
 internal/llm/resolver_test.go | 165 ++++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 17 deletions(-)

diff --git a/internal/llm/resolver.go b/internal/llm/resolver.go
index 47124e64..a1cfe161 100644
--- a/internal/llm/resolver.go
+++ b/internal/llm/resolver.go
@@ -3,6 +3,7 @@ package llm
 import (
 	"encoding/json"
 	"fmt"
+	"math"
 	"os"
 	"path/filepath"
 	"regexp"
@@ -75,6 +76,12 @@ func ResolveEndpointWithModelOverride(configPath, modelOverride string) (Resolve
 				ep.Source = s.name
 			}
 			ep.Model = stripModelSuffix(ep.Model)
+			// OCR_LLM_TIMEOUT is a global override: applies regardless of
+			// which strategy resolved the endpoint, and takes precedence
+			// over config-file values when set.
+			if envTimeout, ok := parseTimeoutEnv(); ok {
+				ep.Timeout = envTimeout
+			}
 			return ep, nil
 		}
 	}
@@ -82,6 +89,46 @@ func ResolveEndpointWithModelOverride(configPath, modelOverride string) (Resolve
 	return ResolvedEndpoint{}, fmt.Errorf("no valid LLM endpoint configured; one of OCR_LLM_URL/OCR_LLM_TOKEN/OCR_LLM_MODEL, ~/.opencodereview/config.json, or ANTHROPIC_BASE_URL/ANTHROPIC_AUTH_TOKEN/ANTHROPIC_MODEL must be set")
 }
 
+// parseTimeoutEnv reads and validates the OCR_LLM_TIMEOUT environment variable.
+// Returns the parsed duration and true if set, or 0 and false if unset/empty.
+// Rejects negative values and values that would overflow time.Duration.
+func parseTimeoutEnv() (time.Duration, bool) {
+	raw := strings.TrimSpace(os.Getenv(envOCRLLMTimeout))
+	if raw == "" {
+		return 0, false
+	}
+	sec, err := strconv.Atoi(raw)
+	if err != nil {
+		return 0, false
+	}
+	if sec < 0 {
+		return 0, false
+	}
+	// Guard against overflow: time.Duration is int64 nanoseconds.
+	maxSec := int64(math.MaxInt64 / int64(time.Second))
+	if int64(sec) > maxSec {
+		return 0, false
+	}
+	return time.Duration(sec) * time.Second, true
+}
+
+// validateTimeoutSec converts a config-file timeout (in seconds) to time.Duration.
+// Returns 0 for zero input (use default). Rejects negative values and overflow.
+func validateTimeoutSec(sec int) (time.Duration, error) {
+	if sec == 0 {
+		return 0, nil
+	}
+	if sec < 0 {
+		return 0, fmt.Errorf("timeout_sec must be non-negative, got %d", sec)
+	}
+	// Guard against overflow: time.Duration is int64 nanoseconds.
+	maxSec := int64(math.MaxInt64 / int64(time.Second))
+	if int64(sec) > maxSec {
+		return 0, fmt.Errorf("timeout_sec %d overflows time.Duration (max %d)", sec, maxSec)
+	}
+	return time.Duration(sec) * time.Second, nil
+}
+
 // tryOCREnv reads OCR-specific environment variables.
 func tryOCREnv(modelOverride string) (ResolvedEndpoint, bool, error) {
 	url := os.Getenv(envOCRLLMURL)
@@ -126,18 +173,10 @@ func tryOCREnv(modelOverride string) (ResolvedEndpoint, bool, error) {
 		}
 	}
 
-	var timeout time.Duration
-	if timeoutStr := os.Getenv(envOCRLLMTimeout); timeoutStr != "" {
-		timeoutSec, err := strconv.Atoi(strings.TrimSpace(timeoutStr))
-		if err != nil {
-			return ResolvedEndpoint{}, false, fmt.Errorf("OCR_LLM_TIMEOUT must be an integer (seconds): %w", err)
-		}
-		if timeoutSec > 0 {
-			timeout = time.Duration(timeoutSec) * time.Second
-		}
-	}
+	// Note: OCR_LLM_TIMEOUT is applied globally in ResolveEndpointWithModelOverride,
+	// not here, so it works regardless of which strategy resolves the endpoint.
 
-	return ResolvedEndpoint{URL: url, Token: token, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR environment", ExtraHeaders: extraHeaders, Timeout: timeout}, true, nil
+	return ResolvedEndpoint{URL: url, Token: token, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR environment", ExtraHeaders: extraHeaders}, true, nil
 }
 
 // llmFileConfig represents the llm section in config.json.
@@ -305,9 +344,9 @@ func tryProviderConfig(cfg configFile, modelOverride string) (ResolvedEndpoint,
 	extraBody = entry.ExtraBody
 	extraHeaders := entry.ExtraHeaders
 
-	var timeout time.Duration
-	if entry.TimeoutSec > 0 {
-		timeout = time.Duration(entry.TimeoutSec) * time.Second
+	timeout, err := validateTimeoutSec(entry.TimeoutSec)
+	if err != nil {
+		return ResolvedEndpoint{}, false, fmt.Errorf("provider %q: %w", cfg.Provider, err)
 	}
 
 	if protocol == "anthropic" {
@@ -359,9 +398,9 @@ func tryLegacyLlmConfig(cfg configFile, modelOverride string) (ResolvedEndpoint,
 		}
 	}
 
-	var timeout time.Duration
-	if cfg.Llm.TimeoutSec > 0 {
-		timeout = time.Duration(cfg.Llm.TimeoutSec) * time.Second
+	timeout, err := validateTimeoutSec(cfg.Llm.TimeoutSec)
+	if err != nil {
+		return ResolvedEndpoint{}, false, fmt.Errorf("OCR config file: %w", err)
 	}
 
 	return ResolvedEndpoint{URL: cfg.Llm.URL, Token: cfg.Llm.AuthToken, Model: model, Protocol: protocol, AuthHeader: authHeader, Source: "OCR config file", ExtraBody: cfg.Llm.ExtraBody, ExtraHeaders: cfg.Llm.ExtraHeaders, Timeout: timeout}, true, nil
diff --git a/internal/llm/resolver_test.go b/internal/llm/resolver_test.go
index 7e5d6afa..48841c8a 100644
--- a/internal/llm/resolver_test.go
+++ b/internal/llm/resolver_test.go
@@ -6,6 +6,7 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
 )
 
 func TestStripModelSuffix(t *testing.T) {
@@ -1123,3 +1124,167 @@ func TestResolveEndpoint_LegacyLlmExtraHeaders(t *testing.T) {
 		t.Errorf("ExtraHeaders[\"X-Legacy\"] = %q, want %q", v, "yes")
 	}
 }
+
+func TestParseTimeoutEnv(t *testing.T) {
+	tests := []struct {
+		name    string
+		value   string
+		want    time.Duration
+		wantOK  bool
+	}{
+		{"empty", "", 0, false},
+		{"valid 120", "120", 120 * time.Second, true},
+		{"valid 60", "60", 60 * time.Second, true},
+		{"zero", "0", 0, true},
+		{"negative", "-5", 0, false},
+		{"non-integer", "abc", 0, false},
+		{"with spaces", " 90 ", 90 * time.Second, true},
+		{"overflow", "99999999999999", 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Setenv("OCR_LLM_TIMEOUT", tt.value)
+			got, ok := parseTimeoutEnv()
+			if ok != tt.wantOK {
+				t.Errorf("parseTimeoutEnv() ok = %v, want %v", ok, tt.wantOK)
+			}
+			if got != tt.want {
+				t.Errorf("parseTimeoutEnv() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestValidateTimeoutSec(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   int
+		want    time.Duration
+		wantErr bool
+	}{
+		{"zero", 0, 0, false},
+		{"positive 60", 60, 60 * time.Second, false},
+		{"positive 300", 300, 300 * time.Second, false},
+		{"negative -1", -1, 0, true},
+		{"negative -100", -100, 0, true},
+		{"max safe", 9223372036, 9223372036 * time.Second, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := validateTimeoutSec(tt.input)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("validateTimeoutSec(%d) error = %v, wantErr %v", tt.input, err, tt.wantErr)
+			}
+			if got != tt.want {
+				t.Errorf("validateTimeoutSec(%d) = %v, want %v", tt.input, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestResolveEndpoint_EnvTimeoutGlobalOverride(t *testing.T) {
+	clearAllEnv(t)
+	t.Setenv("OCR_LLM_URL", "https://api.example.com/v1")
+	t.Setenv("OCR_LLM_TOKEN", "test-token")
+	t.Setenv("OCR_LLM_MODEL", "mimo-v2.5-pro")
+	t.Setenv("OCR_USE_ANTHROPIC", "false")
+	t.Setenv("OCR_LLM_TIMEOUT", "90")
+
+	ep, err := ResolveEndpoint(filepath.Join(t.TempDir(), "nonexistent.json"))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if ep.Timeout != 90*time.Second {
+		t.Errorf("Timeout = %v, want %v", ep.Timeout, 90*time.Second)
+	}
+}
+
+func TestResolveEndpoint_ConfigTimeoutSec(t *testing.T) {
+	clearAllEnv(t)
+
+	cfg := configFile{
+		Llm: llmFileConfig{
+			URL:        "https://api.example.com/v1/messages",
+			AuthToken:  "test-token",
+			Model:      "claude-opus-4-6",
+			TimeoutSec: 120,
+		},
+	}
+	data, _ := json.Marshal(cfg)
+	cfgPath := filepath.Join(t.TempDir(), "config.json")
+	os.WriteFile(cfgPath, data, 0644)
+
+	ep, err := ResolveEndpoint(cfgPath)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if ep.Timeout != 120*time.Second {
+		t.Errorf("Timeout = %v, want %v", ep.Timeout, 120*time.Second)
+	}
+}
+
+func TestResolveEndpoint_NegativeConfigTimeoutSec(t *testing.T) {
+	clearAllEnv(t)
+
+	cfg := configFile{
+		Llm: llmFileConfig{
+			URL:        "https://api.example.com/v1/messages",
+			AuthToken:  "test-token",
+			Model:      "claude-opus-4-6",
+			TimeoutSec: -5,
+		},
+	}
+	data, _ := json.Marshal(cfg)
+	cfgPath := filepath.Join(t.TempDir(), "config.json")
+	os.WriteFile(cfgPath, data, 0644)
+
+	_, err := ResolveEndpoint(cfgPath)
+	if err == nil {
+		t.Fatal("expected error for negative timeout_sec, got nil")
+	}
+	if !strings.Contains(err.Error(), "non-negative") {
+		t.Errorf("error %q should mention non-negative", err.Error())
+	}
+}
+
+func TestNewLLMClient_TimeoutForwarded(t *testing.T) {
+	ep := ResolvedEndpoint{
+		URL:     "https://api.example.com/v1",
+		Token:   "test-token",
+		Model:   "test-model",
+		Timeout: 2 * time.Minute,
+	}
+
+	client := NewLLMClient(ep)
+	if client == nil {
+		t.Fatal("NewLLMClient returned nil")
+	}
+
+	// Verify the client was created (we can't easily inspect the internal timeout,
+	// but we can verify the client is functional and was constructed without error).
+	if oc, ok := client.(*OpenAIClient); ok {
+		if oc.cfg.Timeout != 2*time.Minute {
+			t.Errorf("OpenAIClient cfg.Timeout = %v, want %v", oc.cfg.Timeout, 2*time.Minute)
+		}
+	} else {
+		t.Errorf("expected *OpenAIClient, got %T", client)
+	}
+}
+
+func TestNewLLMClient_DefaultTimeout(t *testing.T) {
+	ep := ResolvedEndpoint{
+		URL:   "https://api.example.com/v1",
+		Token: "test-token",
+		Model: "test-model",
+		// Timeout not set — should default to 5 minutes
+	}
+
+	client := NewLLMClient(ep)
+	if oc, ok := client.(*OpenAIClient); ok {
+		if oc.cfg.Timeout != 5*time.Minute {
+			t.Errorf("OpenAIClient cfg.Timeout = %v, want default %v", oc.cfg.Timeout, 5*time.Minute)
+		}
+	}
+}