-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
executable file
·123 lines (103 loc) · 4.2 KB
/
run.py
File metadata and controls
executable file
·123 lines (103 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
"""
LLM Feedback Chat Application - Startup Script
Usage:
python run.py [--host HOST] [--port PORT] [--reload] [--workers N]
Configuration is loaded from .env file. See .env.example for all options.
Command-line arguments override .env settings.
"""
import argparse
import logging
import sys
import uvicorn
from app.config import settings
def setup_logging():
"""Configure logging based on settings."""
log_level = getattr(logging, settings.log_level.upper(), logging.INFO)
logging.basicConfig(
level=log_level,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
def main():
parser = argparse.ArgumentParser(
description="Run the LLM Feedback Chat server",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python run.py # Run with defaults from .env
python run.py --port 9000 # Override port
python run.py --reload # Development mode with auto-reload
python run.py --workers 2 # Multiple workers (CPU inference only)
Configuration:
Copy .env.example to .env and edit as needed.
Environment variables override .env settings.
Command-line arguments override environment variables.
""",
)
parser.add_argument(
"--host",
default=settings.host,
help=f"Host to bind to (default: {settings.host})",
)
parser.add_argument(
"--port",
type=int,
default=settings.port,
help=f"Port to bind to (default: {settings.port})",
)
parser.add_argument(
"--reload",
action="store_true",
default=settings.debug,
help="Enable auto-reload for development",
)
parser.add_argument(
"--workers",
type=int,
default=settings.workers,
help=f"Number of worker processes (default: {settings.workers})",
)
parser.add_argument(
"--debug",
action="store_true",
default=settings.debug,
help="Enable debug mode",
)
args = parser.parse_args()
setup_logging()
# Build model info string
model_source = str(settings.model_path) if settings.model_path else settings.model_name
provider_info = settings.llm_provider.value
if settings.llm_provider.value == "openai" and settings.openai_base_url:
provider_info += f" ({settings.openai_base_url})"
device_info = settings.model_device or "auto-detect"
if settings.llm_provider.value == "openai":
device_info = "API (remote)"
elif settings.cuda_visible_devices:
device_info += f" (CUDA devices: {settings.cuda_visible_devices})"
print(f"""
╔══════════════════════════════════════════════════════════════════════════╗
║ {settings.app_name:^52} ║
╠══════════════════════════════════════════════════════════════════════════╣
║ Provider: {provider_info:<63} ║
║ Model: {model_source:<63} ║
║ Device: {device_info:<63} ║
║ Server: http://{args.host}:{args.port:<56} ║
║ Workers: {args.workers:<63} ║
╚══════════════════════════════════════════════════════════════════════════╝
""")
# Uvicorn configuration
uvicorn_kwargs = {
"host": args.host,
"port": args.port,
"reload": args.reload,
"log_level": settings.log_level.lower(),
}
# Only use workers if not reloading (reload is incompatible with workers)
if not args.reload and args.workers > 1:
uvicorn_kwargs["workers"] = args.workers
print(f"Warning: Using {args.workers} workers. Ensure you have enough GPU memory!")
uvicorn.run("app.main:app", **uvicorn_kwargs)
if __name__ == "__main__":
main()