diff --git a/API/Classes/Base/MetadataResolver.py b/API/Classes/Base/MetadataResolver.py new file mode 100644 index 000000000..b97c11400 --- /dev/null +++ b/API/Classes/Base/MetadataResolver.py @@ -0,0 +1,111 @@ +""" +MetadataResolver.py +==================== +Singleton adapter for WebAPP/DataStorage/Variables.json. + +Replaces the hardcoded VARIABLES_C / DUALS dictionary pattern with a +dynamic, pathlib-based loader that is initialised once per interpreter +session. All callers receive the same in-memory object after the first +instantiation, keeping I/O cost to a single file read even during batch +runs with hundreds of case runs. +""" + +import sys +sys.dont_write_bytecode = True + +import json +import threading +from pathlib import Path +from typing import Any + +from Classes.Base import Config + +class VariableNotFoundError(KeyError): + """Raised when a variable name is absent from Variables.json.""" + +class MetadataResolver: + """ + Singleton resolver for Osemosys variable metadata. + + Thread-safe: the first instantiation parses the JSON under a lock; + all subsequent instantiations reuse the same populated instance. + """ + + _instance: "MetadataResolver | None" = None + _lock: threading.Lock = threading.Lock() + + def __new__(cls) -> "MetadataResolver": + if cls._instance is None: + with cls._lock: + if cls._instance is None: + instance = super().__new__(cls) + instance._initialized = False + cls._instance = instance + return cls._instance + + def __init__(self) -> None: + if self._initialized: + return + + with self._lock: + if self._initialized: + return + + variables_path: Path = Config.DATA_STORAGE / "Variables.json" + self._raw: dict[str, list[dict[str, Any]]] = self._load(variables_path) + + self._by_name: dict[str, dict[str, Any]] = {} + for group, entries in self._raw.items(): + for entry in entries: + name = entry.get("name", "") + if name: + self._by_name[name] = {**entry, "_group": group} + + self._initialized = True + + @staticmethod + def _load(path: Path) -> dict[str, Any]: + if not path.is_file(): + raise FileNotFoundError( + f"MetadataResolver: Variables.json not found at '{path}'" + ) + with path.open(encoding="utf-8") as fh: + return json.load(fh) + + def get_var_metadata(self, var_name: str) -> dict[str, Any]: + if var_name not in self._by_name: + raise VariableNotFoundError( + f"Variable '{var_name}' not found in Variables.json. " + f"Known variables: {list(self._by_name.keys())}" + ) + + entry = self._by_name[var_name] + + if var_name in Config.VARIABLES_C: + dimensions = list(Config.VARIABLES_C[var_name]) + elif var_name in Config.DUALS: + dimensions = list(Config.DUALS[var_name]) + else: + dimensions = [] + + var_type = "DUAL" if var_name in Config.DUALS else "PRIMAL" + + return { + "dimensions": dimensions, + "unit_rule": entry.get("unitRule", {}), + "type": var_type, + "group": entry.get("_group", ""), + "id": entry.get("id", ""), + "value": entry.get("value", ""), + } + + def all_variable_names(self) -> list[str]: + return list(self._by_name.keys()) + + def group_for(self, var_name: str) -> str: + return self.get_var_metadata(var_name)["group"] + + @classmethod + def reset(cls) -> None: + with cls._lock: + cls._instance = None diff --git a/API/Integration/ResultAdapter.py b/API/Integration/ResultAdapter.py new file mode 100644 index 000000000..f705b3632 --- /dev/null +++ b/API/Integration/ResultAdapter.py @@ -0,0 +1,219 @@ +""" +ResultAdapter.py +================ +Dimension-agnostic extraction engine for CBC / GLPK solver outputs. +""" + +import sys +sys.dont_write_bytecode = True # SeaCelo check — must precede all other imports + +import json +import logging +from pathlib import Path +from typing import Optional + +import pandas as pd + +from Classes.Base import Config +from Classes.Base.MetadataResolver import MetadataResolver, VariableNotFoundError + +log = logging.getLogger(__name__) + +class SecurityError(PermissionError): + """Raised when a requested write path escapes the DATA_STORAGE root.""" + +class ExtractionError(RuntimeError): + """Raised when the result file cannot be parsed.""" + +def _assert_path(value: object, label: str) -> Path: + if not isinstance(value, Path): + raise TypeError( + f"ResultAdapter requires Path objects, got {type(value).__name__} " + f"for parameter '{label}'. Cast your string with Path(...)." + ) + return value + +def _assert_within_storage(output_path: Path) -> None: + storage = Config.DATA_STORAGE.resolve() + target = output_path.resolve() + + try: + within = target.is_relative_to(storage) + except AttributeError: + try: + from os.path import commonpath + within = commonpath([str(storage), str(target)]) == str(storage) + except ValueError: + within = False + + if not within: + raise SecurityError( + f"ResultAdapter refused to write outside DATA_STORAGE.\n" + f" Attempted path : {target}\n" + f" Allowed root : {storage}\n" + "Ensure 'output_path' is inside Config.DATA_STORAGE." + ) + +class ResultAdapter: + + def __init__(self) -> None: + self._resolver = MetadataResolver() + + def extract( + self, + results_file: Path, + data_file: Path, + output_path: Path, + start_year: int, + discount_rate_series: Optional[pd.DataFrame] = None, + ) -> list[Path]: + _assert_path(results_file, "results_file") + _assert_path(data_file, "data_file") + _assert_path(output_path, "output_path") + + _assert_within_storage(output_path) + _assert_within_storage(results_file) + + csv_dir = output_path / "csv" + csv_dir.mkdir(parents=True, exist_ok=True) + + raw_df = self._read_results(results_file) + if raw_df is None or raw_df.empty: + log.warning("ResultAdapter: results file is empty — no CSVs written.") + return [] + + if discount_rate_series is None: + discount_rate_series = self._read_discount_rate(data_file) + + written: list[Path] = [] + seen_params = set(raw_df["parameter"].unique()) + + for var_name in seen_params: + try: + meta = self._resolver.get_var_metadata(var_name) + except VariableNotFoundError: + continue + + try: + csv_path = self._process_variable( + var_name=var_name, + meta=meta, + raw_df=raw_df, + csv_dir=csv_dir, + start_year=start_year, + discount_rate_df=discount_rate_series, + ) + if csv_path is not None: + written.append(csv_path) + except Exception as exc: # noqa: BLE001 + log.error( + "ResultAdapter: failed to process '%s': %s", var_name, exc + ) + + return written + + def _read_results(self, results_file: Path) -> Optional[pd.DataFrame]: + if not results_file.is_file(): + raise ExtractionError( + f"ResultAdapter: results file not found: '{results_file}'" + ) + try: + df = pd.read_csv(results_file, sep="\t", header=None, names=["raw"]) + except Exception as exc: + raise ExtractionError( + f"ResultAdapter: cannot read '{results_file}': {exc}" + ) from exc + + df["raw"] = df["raw"].str.lstrip(" *\n\t") + if df.empty: + return None + + split = df["raw"].str.split(")", expand=True, n=1) + if split.shape[1] < 2: + return None + + df = df.copy() + df["lhs"] = split[0].str.strip() + df["rhs"] = split[1].str.strip() + + mask = df["lhs"].str.contains(r"\(", na=False) + df = df[mask].copy() + if df.empty: + return None + + value_split = df["rhs"].str.split(expand=True) + df["value"] = pd.to_numeric(value_split[0], errors="coerce").round(6) + df["dual"] = pd.to_numeric( + value_split[1] if value_split.shape[1] > 1 else None, + errors="coerce", + ).round(6) + + lhs_split = df["lhs"].str.split("(", expand=True, n=1) + df["parameter"] = ( + lhs_split[0].str.split().str[-1] + ) + df["id"] = lhs_split[1].str.strip() if lhs_split.shape[1] > 1 else "" + + df = df.dropna(subset=["parameter"]) + return df[["parameter", "id", "value", "dual"]].reset_index(drop=True) + + def _read_discount_rate(self, data_file: Path) -> pd.DataFrame: + try: + from Classes.Case.DataFileClass import DataFile # noqa: PLC0415 + parsed = DataFile.parseDataFile(None, data_file) # type: ignore[arg-type] + from Classes.Base.Config import PARAMETERS_C_full # noqa: PLC0415 + dr = pd.DataFrame( + parsed["DiscountRate"], + columns=PARAMETERS_C_full["DiscountRate"], + ) + dr["DiscountRate"] = dr["DiscountRate"].astype(float) + return dr[["r", "DiscountRate"]] + except Exception as exc: # noqa: BLE001 + log.warning( + "ResultAdapter: could not derive DiscountRate from data file " + "(%s) — DUAL adjustments may be inaccurate.", exc + ) + return pd.DataFrame(columns=["r", "DiscountRate"]) + + def _process_variable( + self, + var_name: str, + meta: dict, + raw_df: pd.DataFrame, + csv_dir: Path, + start_year: int, + discount_rate_df: pd.DataFrame, + ) -> Optional[Path]: + dimensions: list[str] = meta["dimensions"] + var_type: str = meta["type"] + + df_var = raw_df[raw_df["parameter"] == var_name].copy() + if df_var.empty: + return None + + try: + dim_cols = df_var["id"].str.split(",", expand=True) + if dim_cols.shape[1] != len(dimensions): + return None + for i, dim in enumerate(dimensions): + df_var[dim] = dim_cols[i].str.strip() + except Exception: + return None + + value_col = "dual" if var_type == "DUAL" else "value" + result_cols = dimensions + [value_col] + df_out = df_var[result_cols].rename(columns={value_col: var_name}) + + if var_type == "DUAL" and not discount_rate_df.empty and "y" in dimensions: + df_out["y"] = df_out["y"].astype(int) + df_merged = pd.merge(df_out, discount_rate_df, on="r", how="left") + df_merged[var_name] = df_merged[var_name] * ( + (1 + df_merged["DiscountRate"]) ** ( + df_merged["y"] - start_year + 0.5 + ) + ) + df_out = df_merged[dimensions + [var_name]] + + out_path = csv_dir / f"{var_name}.csv" + df_out.to_csv(out_path, index=False) + return out_path diff --git a/API/Integration/__init__.py b/API/Integration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/API/Integration/test_result_adapter.py b/API/Integration/test_result_adapter.py new file mode 100644 index 000000000..e39c465f2 --- /dev/null +++ b/API/Integration/test_result_adapter.py @@ -0,0 +1,365 @@ +""" +test_result_adapter.py +====================== +Self-contained smoke-test for :class:`ResultAdapter`. + +What this proves +---------------- +1. The engine handles a **4D variable** (r, y, t, m) it has never seen + before, with zero code changes — proving true dimension-agnosticism. +2. The **SecurityError** guard fires when an out-of-root path is passed. +3. A **DUAL variable** receives the present-value discount adjustment. +4. A **TypeError** is raised when ``str`` paths are passed instead of + ``Path`` objects. + +Run +--- +:: + + # from the muiogo repo root + python -m API.Integration.test_result_adapter + +Exit code 0 → all assertions passed. +Exit code 1 → one or more assertions failed (details printed). +""" + +import sys +import json +import tempfile +import textwrap +import traceback +from pathlib import Path + +# --------------------------------------------------------------------------- +# Bootstrap: make sure the API directory is in sys.path so that the relative +# imports inside API.* work when this script is run directly. +# --------------------------------------------------------------------------- +API_ROOT = Path(__file__).resolve().parents[1] # …/muiogo/API/ +if str(API_ROOT) not in sys.path: + sys.path.insert(0, str(API_ROOT)) + +# Now patch Config.DATA_STORAGE to point at our temp directory BEFORE the +# first import of anything that references it. +import importlib +import Classes.Base.Config as _Config # noqa: E402 + +# We will override DATA_STORAGE per-test using a tmp dir. + +from Classes.Base.MetadataResolver import MetadataResolver # noqa: E402 +from Integration.ResultAdapter import ResultAdapter, SecurityError # noqa: E402 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _build_variables_json(tmp_dir: Path, extra_group: str, entry: dict) -> Path: + """Write a minimal Variables.json that includes *entry* under *extra_group*.""" + # Load the real Variables.json and inject our synthetic variable + real_vars_path = _Config.DATA_STORAGE / "Variables.json" + if real_vars_path.is_file(): + with real_vars_path.open() as fh: + data = json.load(fh) + else: + data = {} + + if extra_group not in data: + data[extra_group] = [] + data[extra_group].append(entry) + + out = tmp_dir / "Variables.json" + out.write_text(json.dumps(data, indent=2)) + return out + + +def _build_results_txt(tmp_dir: Path, rows: list[str]) -> Path: + """Write a minimal CBC-format results.txt file.""" + header = "Optimal - objective value 9999.0000" + content = header + "\n" + "\n".join(rows) + out = tmp_dir / "results.txt" + out.write_text(content) + return out + + +def _build_discount_rate_df(): + """Return a tiny DiscountRate DataFrame for DUAL tests.""" + import pandas as pd + return pd.DataFrame({"r": ["RE1"], "DiscountRate": [0.05]}) + + +# --------------------------------------------------------------------------- +# Individual test cases +# --------------------------------------------------------------------------- + +PASS = "PASS" +FAIL = "FAIL" + + +def test_4d_variable_extraction() -> str: + """ + Injects a synthetic 4D variable (r, y, t, m) → MockVar4D. + Verifies output CSV has exactly those four dimension columns plus + a fifth column named 'MockVar4D'. + """ + import pandas as pd + + with tempfile.TemporaryDirectory() as _tmp: + tmp = Path(_tmp) + data_storage = tmp / "DataStorage" + data_storage.mkdir() + + # Patch Config so MetadataResolver and ResultAdapter use our tmp dir + original_ds = _Config.DATA_STORAGE + _Config.DATA_STORAGE = data_storage + MetadataResolver.reset() # force singleton re-init with new path + + # Inject our 4D variable into Variables.json + var_entry = { + "id": "MV4", + "value": "Mock Variable 4D", + "name": "MockVar4D", + "unitRule": {"cat": [{"var": "ActUnitId"}]}, + } + _build_variables_json(data_storage, "RYTM", var_entry) + + # Patch Config.VARIABLES_C with the new variable's dimension list + _Config.VARIABLES_C["MockVar4D"] = ["r", "y", "t", "m"] + + try: + # Build a fake results.txt with two rows for MockVar4D + results_rows = [ + " 1 MockVar4D(RE1,2025,SOLAR,1) 3.1400 0.0", + " 2 MockVar4D(RE1,2026,WIND,2) 1.5900 0.0", + ] + results_txt = _build_results_txt(data_storage, results_rows) + + output_path = data_storage / "case01" / "res" / "run01" + output_path.mkdir(parents=True) + + adapter = ResultAdapter() + written = adapter.extract( + results_file=results_txt, + data_file=results_txt, # data_file unused in this test + output_path=output_path, + start_year=2025, + discount_rate_series=pd.DataFrame(columns=["r", "DiscountRate"]), + ) + + csv_path = output_path / "csv" / "MockVar4D.csv" + assert csv_path.is_file(), f"CSV not written: {csv_path}" + + df = pd.read_csv(csv_path) + expected_cols = {"r", "y", "t", "m", "MockVar4D"} + actual_cols = set(df.columns) + assert actual_cols == expected_cols, ( + f"Column mismatch.\n Expected: {expected_cols}\n Got: {actual_cols}" + ) + assert len(df) == 2, f"Expected 2 rows, got {len(df)}" + + finally: + _Config.DATA_STORAGE = original_ds + _Config.VARIABLES_C.pop("MockVar4D", None) + MetadataResolver.reset() + + return PASS + + +def test_security_error_on_out_of_root_write() -> str: + """ + Passes an output_path that lives outside DATA_STORAGE. + results_file is legitimately inside DATA_STORAGE; only output_path escapes. + Expects SecurityError to be raised. + """ + import pandas as pd + + original_ds = _Config.DATA_STORAGE + + with tempfile.TemporaryDirectory() as _tmp: + tmp = Path(_tmp) + data_storage = tmp / "DataStorage" + data_storage.mkdir() + + # Copy real Variables.json so MetadataResolver can initialise + real = original_ds / "Variables.json" + (data_storage / "Variables.json").write_bytes( + real.read_bytes() if real.is_file() else b"{}" + ) + + _Config.DATA_STORAGE = data_storage + MetadataResolver.reset() + + # results_file is inside DATA_STORAGE ✓; output_path is not ✗ + results_txt = data_storage / "results.txt" + results_txt.write_text("Optimal - objective value 0\n") + + with tempfile.TemporaryDirectory() as _escape: + escape_path = Path(_escape) / "attacker_dir" + escape_path.mkdir() + + try: + adapter = ResultAdapter() + raised = False + try: + adapter.extract( + results_file=results_txt, + data_file=results_txt, + output_path=escape_path, # ← outside DATA_STORAGE + start_year=2020, + discount_rate_series=pd.DataFrame( + columns=["r", "DiscountRate"] + ), + ) + except SecurityError: + raised = True + + assert raised, "SecurityError was NOT raised for an out-of-root path." + + finally: + _Config.DATA_STORAGE = original_ds + MetadataResolver.reset() + + return PASS + + +def test_dual_variable_discount_applied() -> str: + """ + Verifies that EBb4_EnergyBalanceEachYear4_ICR (a DUAL) has its + present-value discount formula applied: dual × (1+r)^(y−start+0.5). + """ + import math + import pandas as pd + + original_ds = _Config.DATA_STORAGE + + with tempfile.TemporaryDirectory() as _tmp: + tmp = Path(_tmp) + data_storage = tmp / "DataStorage" + data_storage.mkdir() + + # Copy real Variables.json BEFORE resetting the singleton so that + # the next MetadataResolver() call can find it immediately. + real = original_ds / "Variables.json" + (data_storage / "Variables.json").write_bytes( + real.read_bytes() if real.is_file() else b"{}" + ) + + _Config.DATA_STORAGE = data_storage + MetadataResolver.reset() + + try: + DUAL_VAR = "EBb4_EnergyBalanceEachYear4_ICR" + raw_dual = 2.0 + start_year = 2020 + year = 2025 + rate = 0.10 + expected_pv = raw_dual * math.pow(1 + rate, year - start_year + 0.5) + + results_rows = [ + f" 1 {DUAL_VAR}(RE1,FUEL1,{year}) {raw_dual:.4f} {raw_dual:.4f}", + ] + # results_txt must be inside data_storage (root-protection check) + results_txt = data_storage / "results.txt" + results_txt.write_text( + "Optimal - objective value 9999.0000\n" + "\n".join(results_rows) + ) + + output_path = data_storage / "case01" / "res" / "run01" + output_path.mkdir(parents=True) + + dr_df = pd.DataFrame({"r": ["RE1"], "DiscountRate": [rate]}) + + adapter = ResultAdapter() + adapter.extract( + results_file=results_txt, + data_file=results_txt, + output_path=output_path, + start_year=start_year, + discount_rate_series=dr_df, + ) + + csv_path = output_path / "csv" / f"{DUAL_VAR}.csv" + assert csv_path.is_file(), f"DUAL CSV not written: {csv_path}" + + df = pd.read_csv(csv_path) + assert len(df) == 1 + actual_pv = df[DUAL_VAR].iloc[0] + assert abs(actual_pv - expected_pv) < 1e-3, ( + f"PV mismatch: expected {expected_pv:.4f}, got {actual_pv:.4f}" + ) + + finally: + _Config.DATA_STORAGE = original_ds + MetadataResolver.reset() + + return PASS + + +def test_type_error_on_string_path() -> str: + """Passing a raw string instead of a Path should raise TypeError.""" + import pandas as pd + + adapter = ResultAdapter() + raised = False + try: + adapter.extract( + results_file="/some/string/path", # ← not a Path + data_file=Path("/placeholder"), + output_path=_Config.DATA_STORAGE, + start_year=2020, + discount_rate_series=pd.DataFrame(columns=["r", "DiscountRate"]), + ) + except TypeError: + raised = True + + assert raised, "TypeError was NOT raised when a str path was passed." + return PASS + + +# --------------------------------------------------------------------------- +# Test runner +# --------------------------------------------------------------------------- + +TESTS = [ + ("4D variable extraction", test_4d_variable_extraction), + ("SecurityError on out-of-root", test_security_error_on_out_of_root_write), + ("DUAL discount applied", test_dual_variable_discount_applied), + ("TypeError on str path", test_type_error_on_string_path), +] + + +def main() -> int: + print("\n" + "═" * 60) + print(" MUIOGO ResultAdapter — Mock Test Suite") + print("═" * 60) + + failures = 0 + for name, fn in TESTS: + try: + result = fn() + status = result + except AssertionError as exc: + status = FAIL + failures += 1 + print(f" [ FAIL ] {name}") + print(f" AssertionError: {exc}") + continue + except Exception: # noqa: BLE001 + status = FAIL + failures += 1 + print(f" [ FAIL ] {name}") + traceback.print_exc() + continue + mark = "✓" if status == PASS else "✗" + print(f" [ {mark} ] {name}") + + print("─" * 60) + if failures == 0: + print(f" All {len(TESTS)} tests passed.\n") + return 0 + else: + print(f" {failures}/{len(TESTS)} test(s) FAILED.\n") + return 1 + +if __name__ == "__main__": + import sys + sys.exit(main())