From 44423a2aad80943bc7624ecd8913672a6c95ea48 Mon Sep 17 00:00:00 2001 From: Shiwani Mishra Date: Tue, 31 Mar 2026 17:20:25 +0530 Subject: [PATCH] feat: validate critical config at startup and abort with clear messages Add application/validate_config.py with three focused validators: - validate_web_config(mode): checks GOOGLE_CLIENT_SECRET and GOOGLE_CLIENT_ID at web server startup. Exits with a clear error if either is missing and NO_LOGIN is not set. Logs warnings when NEO4J_URL or REDIS_URL are absent so optional-service failures are visible immediately rather than surfacing deep in a request. Skipped in test/testing mode. - validate_embeddings_config(): called in cre_main.run() before --generate_embeddings. Exits if none of OPENAI_API_KEY, GEMINI_API_KEY, or GCP_NATIVE is configured. - validate_neo4j_config(): called in cre_main.run() before --populate_neo4j_db. Exits if NEO4J_URL is not set. The web validator is wired into create_app() and skipped when a custom conf object is provided (CLI db_connect path) or when running in test mode, so neither the CLI nor the test suite is affected. --- application/__init__.py | 6 + application/cmd/cre_main.py | 6 + application/tests/validate_config_test.py | 150 ++++++++++++++++++++++ application/validate_config.py | 83 ++++++++++++ 4 files changed, 245 insertions(+) create mode 100644 application/tests/validate_config_test.py create mode 100644 application/validate_config.py diff --git a/application/__init__.py b/application/__init__.py index 454ca7c38..95bb6e8b8 100644 --- a/application/__init__.py +++ b/application/__init__.py @@ -37,6 +37,12 @@ def create_app(mode: str = "production", conf: any = None) -> Any: letters = string.ascii_lowercase app.secret_key = "".join(random.choice(letters) for i in range(20)) + # conf is only provided by CLI db_connect(); skip web validation in that path. + if conf is None: + from application.validate_config import validate_web_config + + validate_web_config(mode) + # config[mode].init_app(app) sqla.init_app(app=app) from application.web.web_main import app as app_blueprint diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index ead5a4281..490ca7392 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -656,8 +656,14 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover BaseParser().call_importers(db_connection_str=args.cache_file) if args.generate_embeddings: + from application.validate_config import validate_embeddings_config + + validate_embeddings_config() generate_embeddings(args.cache_file) if args.populate_neo4j_db: + from application.validate_config import validate_neo4j_config + + validate_neo4j_config() populate_neo4j_db(args.cache_file) if args.start_worker: from application.worker import start_worker diff --git a/application/tests/validate_config_test.py b/application/tests/validate_config_test.py new file mode 100644 index 000000000..5c0d0b072 --- /dev/null +++ b/application/tests/validate_config_test.py @@ -0,0 +1,150 @@ +import os +import unittest +from unittest.mock import patch + +from application.validate_config import ( + validate_embeddings_config, + validate_neo4j_config, + validate_web_config, +) + + +class TestValidateWebConfig(unittest.TestCase): + def test_skips_in_test_mode(self): + """Should not call sys.exit in test or testing mode regardless of env vars.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"GOOGLE_CLIENT_SECRET": "", "GOOGLE_CLIENT_ID": ""} + ): + os.environ.pop("NO_LOGIN", None) + validate_web_config("test") + validate_web_config("testing") + mock_exit.assert_not_called() + + def test_exits_when_auth_credentials_missing(self): + """Should exit when NO_LOGIN is not set and Google credentials are absent.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"GOOGLE_CLIENT_SECRET": "", "GOOGLE_CLIENT_ID": ""} + ): + os.environ.pop("NO_LOGIN", None) + validate_web_config("production") + mock_exit.assert_called_once_with(1) + + def test_exits_when_only_secret_missing(self): + """Should exit when GOOGLE_CLIENT_SECRET is absent even if GOOGLE_CLIENT_ID is set.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + {"GOOGLE_CLIENT_SECRET": "", "GOOGLE_CLIENT_ID": "client-id"}, + ): + os.environ.pop("NO_LOGIN", None) + validate_web_config("production") + mock_exit.assert_called_once_with(1) + + def test_skips_auth_check_when_no_login_set(self): + """Should not exit when NO_LOGIN is set, even without Google credentials.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + { + "NO_LOGIN": "True", + "GOOGLE_CLIENT_SECRET": "", + "GOOGLE_CLIENT_ID": "", + }, + ): + validate_web_config("production") + mock_exit.assert_not_called() + + def test_passes_with_full_credentials(self): + """Should not exit when all required credentials are present.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + { + "GOOGLE_CLIENT_SECRET": "secret", + "GOOGLE_CLIENT_ID": "client-id", + "NEO4J_URL": "neo4j://localhost:7687", + "REDIS_URL": "redis://localhost:6379", + }, + ): + os.environ.pop("NO_LOGIN", None) + validate_web_config("production") + mock_exit.assert_not_called() + + def test_warns_but_does_not_exit_when_neo4j_missing(self): + """Should warn but not exit when NEO4J_URL is absent.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + { + "NO_LOGIN": "True", + "REDIS_URL": "redis://localhost:6379", + }, + ): + os.environ.pop("NEO4J_URL", None) + validate_web_config("production") + mock_exit.assert_not_called() + + def test_warns_but_does_not_exit_when_redis_missing(self): + """Should warn but not exit when REDIS_URL and REDIS_HOST are absent.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + { + "NO_LOGIN": "True", + "NEO4J_URL": "neo4j://localhost:7687", + }, + ): + os.environ.pop("REDIS_URL", None) + os.environ.pop("REDIS_HOST", None) + validate_web_config("production") + mock_exit.assert_not_called() + + +class TestValidateEmbeddingsConfig(unittest.TestCase): + def test_exits_when_no_ai_provider_configured(self): + """Should exit when none of OPENAI_API_KEY, GEMINI_API_KEY, or GCP_NATIVE is set.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, + {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "", "GCP_NATIVE": ""}, + ): + validate_embeddings_config() + mock_exit.assert_called_once_with(1) + + def test_passes_with_openai_key(self): + """Should not exit when OPENAI_API_KEY is set.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"OPENAI_API_KEY": "sk-test"} + ): + validate_embeddings_config() + mock_exit.assert_not_called() + + def test_passes_with_gemini_key(self): + """Should not exit when GEMINI_API_KEY is set.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"GEMINI_API_KEY": "key"} + ): + validate_embeddings_config() + mock_exit.assert_not_called() + + def test_passes_with_gcp_native(self): + """Should not exit when GCP_NATIVE is set.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"GCP_NATIVE": "true"} + ): + validate_embeddings_config() + mock_exit.assert_not_called() + + +class TestValidateNeo4jConfig(unittest.TestCase): + def test_exits_when_neo4j_url_missing(self): + """Should exit when NEO4J_URL is not set.""" + with patch("sys.exit") as mock_exit, patch.dict(os.environ, {"NEO4J_URL": ""}): + validate_neo4j_config() + mock_exit.assert_called_once_with(1) + + def test_passes_when_neo4j_url_set(self): + """Should not exit when NEO4J_URL is present.""" + with patch("sys.exit") as mock_exit, patch.dict( + os.environ, {"NEO4J_URL": "neo4j://localhost:7687"} + ): + validate_neo4j_config() + mock_exit.assert_not_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/application/validate_config.py b/application/validate_config.py new file mode 100644 index 000000000..243d69aa9 --- /dev/null +++ b/application/validate_config.py @@ -0,0 +1,83 @@ +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +def validate_web_config(mode: str) -> None: + """Validates critical configuration for web server startup. + + Logs warnings for optional services that are not configured and aborts + with a clear error message if required variables are missing. + Skipped entirely in test/testing mode. + + Args: + mode: the Flask configuration mode (e.g. "production", "development"). + """ + if mode.upper() in ("TESTING", "TEST"): + return + + errors = [] + no_login = os.environ.get("NO_LOGIN") + + if not no_login: + if not os.environ.get("GOOGLE_CLIENT_SECRET"): + errors.append( + "GOOGLE_CLIENT_SECRET is not set. " + "Flask requires a secret key to sign sessions securely. " + "Set GOOGLE_CLIENT_SECRET or set NO_LOGIN=True to disable authentication." + ) + if not os.environ.get("GOOGLE_CLIENT_ID"): + errors.append( + "GOOGLE_CLIENT_ID is not set. " + "Google OAuth login will not work. " + "Set GOOGLE_CLIENT_ID or set NO_LOGIN=True to disable authentication." + ) + + if not os.environ.get("NEO4J_URL"): + logger.warning( + "NEO4J_URL is not set. Gap analysis features will be unavailable." + ) + + if not os.environ.get("REDIS_URL") and not os.environ.get("REDIS_HOST"): + logger.warning( + "REDIS_URL is not set. Background job processing will be unavailable." + ) + + if errors: + for error in errors: + logger.error("[startup] %s", error) + sys.exit(1) + + +def validate_embeddings_config() -> None: + """Checks that an AI provider API key is present before generating embeddings. + + Aborts with a clear message if none of OPENAI_API_KEY, GEMINI_API_KEY, + or GCP_NATIVE is configured. + """ + if ( + not os.environ.get("OPENAI_API_KEY") + and not os.environ.get("GEMINI_API_KEY") + and not os.environ.get("GCP_NATIVE") + ): + logger.error( + "[startup] No AI provider configured. " + "Set OPENAI_API_KEY, GEMINI_API_KEY, or GCP_NATIVE before generating embeddings." + ) + sys.exit(1) + + +def validate_neo4j_config() -> None: + """Checks that NEO4J_URL is set before attempting Neo4j database operations. + + Aborts with a clear message if NEO4J_URL is not configured. + """ + if not os.environ.get("NEO4J_URL"): + logger.error( + "[startup] NEO4J_URL is not set. " + "Cannot populate the Neo4j database. " + "Set NEO4J_URL to your Neo4j instance URL and retry." + ) + sys.exit(1)