From a3ea1a2a6be6f9428cbdd63c2b4e66b023b936cf Mon Sep 17 00:00:00 2001 From: gklajer Date: Sun, 22 Mar 2026 20:39:18 +0000 Subject: [PATCH 1/4] feat(profiling): add Mosaic memory analysis workflow --- pyproject.toml | 22 ++ scripts/lora_memory_analysis.py | 347 +++++++++++++++++ uv.lock | 654 +++++++++++++++++++++++++++++++- 3 files changed, 1012 insertions(+), 11 deletions(-) create mode 100644 scripts/lora_memory_analysis.py diff --git a/pyproject.toml b/pyproject.toml index ef1e3e3..c807a65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,10 @@ dependencies = [ [tool.uv.sources] stella = { git = "https://github.com/SonyResearch/stella" } +# Mosaic's latest upstream commit is currently broken for this workflow, so we pin +# to a previous commit. +mosaic = { git = "https://github.com/facebookresearch/mosaic.git", rev = "27a16228ae22222cf2b834d31afbf64f2dff66e0" } + [build-system] # https://docs.astral.sh/uv/concepts/build-backend/ requires = ["uv_build>=0.9.18,<0.11.0"] build-backend = "uv_build" @@ -56,6 +60,24 @@ dev = [ "typeguard (>=4.4.1)", ] +mosaic = [ + # Keep the runtime deps here because the pinned git package is not pulling + # them into this project environment reliably on its own. + "altair>=5.5.0", + "click>=8.3.0", + "mcp>=1.0.0", + "mosaic", + "omegaconf>=2.3.0", + "pandas>=2.3.3", + "tabulate>=0.9.0", +] + +profiling = [ + { include-group = "mosaic" }, + "tensorboard>=2.20.0", + "torch-tb-profiler>=0.4.3", +] + [tool.codespell] # https://github.com/codespell-project/codespell builtin = "en-GB_to_en-US,clear,code,rare" check-filenames = true diff --git a/scripts/lora_memory_analysis.py b/scripts/lora_memory_analysis.py new file mode 100644 index 0000000..1da5240 --- /dev/null +++ b/scripts/lora_memory_analysis.py @@ -0,0 +1,347 @@ +"""Profile a dense-vs-LoRA linear layer and generate optional Mosaic reports.""" + +from __future__ import annotations + +import argparse +import json +import sys +from contextlib import contextmanager, redirect_stdout +from io import StringIO +from pathlib import Path +from typing import TYPE_CHECKING, Any, Protocol + +import torch +from torch import nn +from torch.profiler import ( + ProfilerActivity, + profile, + record_function, + schedule, + tensorboard_trace_handler, +) + +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + +class MemorySnapshotProtocol(Protocol): + """Minimal snapshot view needed for peak-memory summaries.""" + + dynamic_memory_peak: float + static_memory: float + + +class MemoryAbstractProtocol(Protocol): + """Minimal Mosaic memory abstraction used by this script.""" + + memory_snapshot: MemorySnapshotProtocol + + +ROOT = Path(__file__).resolve().parents[1] +SNAPSHOT_DIR = ROOT / "deliverables" / "single_layer_lora_outputs" +OUTPUT_DIR = SNAPSHOT_DIR / "mosaic" +ANNOTATIONS = ("## forward ##", "## backward ##", "## optimizer ##") +SNAPSHOT_NAMES = ("dense", "frozen_lora") + +STEPS = 5 +BATCH_SIZE = 16 +IN_FEATURES = 4096 +OUT_FEATURES = 4096 +RANK = 16 + +DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments for profiling and post-processing.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--mosaic-only", + action="store_true", + help="Skip profiling and only run Mosaic analysis on existing snapshots.", + ) + return parser.parse_args() + + +class LoRALinear(nn.Module): + """Linear layer with a frozen base projection and trainable LoRA adapters.""" + + def __init__(self, base: nn.Linear, rank: int) -> None: + super().__init__() + self.base = base + self.base.requires_grad_(requires_grad=False) + self.lora_a = nn.Linear(base.in_features, rank, bias=False) + self.lora_b = nn.Linear(rank, base.out_features, bias=False) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """Apply the frozen base projection and the LoRA update.""" + return self.base(inputs) + self.lora_b(self.lora_a(inputs)) + + +def build_inputs() -> tuple[torch.Tensor, torch.Tensor]: + """Create deterministic inputs and labels for the profiling run.""" + generator = torch.Generator(device="cpu").manual_seed(7) + inputs = torch.randn(BATCH_SIZE, IN_FEATURES, generator=generator) + labels = torch.randn(BATCH_SIZE, OUT_FEATURES, generator=generator) + return inputs.to(DEVICE), labels.to(DEVICE) + + +def make_dense_model() -> nn.Module: + """Construct the dense linear baseline model.""" + torch.manual_seed(7) + return nn.Linear(IN_FEATURES, OUT_FEATURES, bias=False) + + +def make_lora_model() -> nn.Module: + """Construct the frozen-base LoRA variant used for comparison.""" + torch.manual_seed(7) + return LoRALinear(nn.Linear(IN_FEATURES, OUT_FEATURES, bias=False), rank=RANK) + + +def profiler_activities() -> list[ProfilerActivity]: + """Return the profiler activity set supported by the current device.""" + if DEVICE.type == "cuda": + return [ProfilerActivity.CPU, ProfilerActivity.CUDA] + return [ProfilerActivity.CPU] + + +def bytes_to_gib(num_bytes: float) -> float: + """Convert bytes to gibibytes.""" + return num_bytes / 1024**3 + + +@contextmanager +def capture_snapshot(snapshot_path: Path) -> Iterator[None]: + """Record a CUDA allocator snapshot around a profiling region.""" + if DEVICE.type != "cuda": + yield + return + + torch.cuda.memory._record_memory_history(max_entries=100000) # noqa: SLF001 + try: + yield + finally: + torch.cuda.memory._dump_snapshot(str(snapshot_path)) # noqa: SLF001 + torch.cuda.memory._record_memory_history(enabled=None) # noqa: SLF001 + + +def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch.Tensor) -> None: + """Run the profiler and snapshot capture for one model variant.""" + model = model.to(DEVICE) + loss_fn = nn.MSELoss() + optimizer = torch.optim.AdamW( + [parameter for parameter in model.parameters() if parameter.requires_grad], lr=0.05 + ) + + snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" + trace_dir = SNAPSHOT_DIR / f"{name}_traces" + + with ( + capture_snapshot(snapshot_path), + profile( + activities=profiler_activities(), + schedule=schedule(wait=0, warmup=0, active=STEPS, repeat=1), # codespell:ignore warmup + record_shapes=True, + profile_memory=True, + with_stack=True, + on_trace_ready=tensorboard_trace_handler(str(trace_dir)), + ) as profiler, + ): + for _ in range(STEPS): + profiler.step() + + with record_function("## forward ##"): + pred = model(inputs) + + with record_function("## backward ##"): + loss_fn(pred, labels).backward() + + with record_function("## optimizer ##"): + optimizer.step() + optimizer.zero_grad(set_to_none=True) + + sys.stdout.write(f"{name}\n") + sys.stdout.write(f" snapshot: {snapshot_path}\n") + sys.stdout.write(f" trace_dir: {trace_dir}\n") + + +def load_mosaic() -> tuple[Any, Any, Any, Any]: + """Import the Mosaic entry points required for report generation.""" + try: + from mosaic.cmd.entry_point import ( # noqa: PLC0415 + get_memory_profile, + get_memory_usage_by_annotation_stage, + get_memory_usage_peak, + ) + from mosaic.libmosaic.analyzer.memory_abstract import MemoryAbstract # noqa: PLC0415 + except ImportError as exc: + message = ( + "Mosaic is not importable in the current environment. Run `uv sync --group mosaic` " + "to install the configured Mosaic dependency and its runtime requirements." + ) + raise ImportError(message) from exc + + return ( + get_memory_profile, + get_memory_usage_by_annotation_stage, + get_memory_usage_peak, + MemoryAbstract, + ) + + +def capture_stdout(callback: Callable[[], Any]) -> tuple[Any, str]: + """Run a callback and return both its result and captured standard output.""" + buffer = StringIO() + with redirect_stdout(buffer): + result = callback() + return result, buffer.getvalue() + + +def build_peak_summary(name: str, memory_abstract: MemoryAbstractProtocol) -> dict[str, Any]: + """Build a JSON-serializable peak-memory summary for a snapshot.""" + dynamic_peak_bytes = float(memory_abstract.memory_snapshot.dynamic_memory_peak) + static_memory_bytes = float(memory_abstract.memory_snapshot.static_memory) + overall_peak_bytes = dynamic_peak_bytes + static_memory_bytes + return { + "name": name, + "dynamic_peak_bytes": dynamic_peak_bytes, + "dynamic_peak_gib": bytes_to_gib(dynamic_peak_bytes), + "static_memory_bytes": static_memory_bytes, + "static_memory_gib": bytes_to_gib(static_memory_bytes), + "overall_peak_bytes": overall_peak_bytes, + "overall_peak_gib": bytes_to_gib(overall_peak_bytes), + } + + +def analyze_snapshot(name: str) -> dict[str, Any]: + """Generate Mosaic reports and a summary JSON file for one snapshot.""" + snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" + if not snapshot_path.exists(): + message = f"Snapshot not found: {snapshot_path}" + raise FileNotFoundError(message) + + ( + get_memory_profile, + get_memory_usage_by_annotation_stage, + get_memory_usage_peak, + _memory_abstract, + ) = load_mosaic() + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + categories_path = OUTPUT_DIR / f"{name}_categories.html" + annotations_path = OUTPUT_DIR / f"{name}_annotations.html" + peak_report_path = OUTPUT_DIR / f"{name}_peak.txt" + summary_path = OUTPUT_DIR / f"{name}_summary.json" + + get_memory_profile( + snapshot=str(snapshot_path), + out_path=str(categories_path), + profile="categories", + sampling_rate=1, + preserve_allocation_order=True, + ) + get_memory_profile( + snapshot=str(snapshot_path), + out_path=str(annotations_path), + profile="annotations", + sampling_rate=1, + preserve_allocation_order=True, + ) + + annotation_usage, annotation_output = capture_stdout( + lambda: get_memory_usage_by_annotation_stage( + snapshot=str(snapshot_path), annotation=ANNOTATIONS, paste=False + ) + ) + peak_memory_abstract, peak_output = capture_stdout( + lambda: get_memory_usage_peak( + snapshot=str(snapshot_path), + trace="", + allocation="", + action="alloc", + paste=False, + print_stack=True, + upload_result=False, + ) + ) + + annotation_summary = { + stage: { + "annotation": metadata, + "memory_bytes": float(memory_bytes), + "memory_gib": bytes_to_gib(float(memory_bytes)), + } + for stage, (metadata, memory_bytes) in annotation_usage.items() + } + peak_summary = build_peak_summary(name, peak_memory_abstract) + peak_summary["annotation_memory"] = annotation_summary + peak_summary["files"] = { + "snapshot": str(snapshot_path), + "categories_html": str(categories_path), + "annotations_html": str(annotations_path), + "peak_report": str(peak_report_path), + } + + peak_report_path.write_text(peak_output + "\n" + annotation_output, encoding="utf-8") + summary_path.write_text(json.dumps(peak_summary, indent=2), encoding="utf-8") + + sys.stdout.write(f"{name}\n") + sys.stdout.write(f" categories_html: {categories_path}\n") + sys.stdout.write(f" annotations_html: {annotations_path}\n") + sys.stdout.write(f" peak_report: {peak_report_path}\n") + sys.stdout.write(f" summary: {summary_path}\n") + + return peak_summary + + +def write_comparison(summaries: list[dict[str, Any]]) -> Path: + """Write a dense-vs-LoRA comparison JSON file.""" + summary_by_name = {summary["name"]: summary for summary in summaries} + dense = summary_by_name["dense"] + frozen_lora = summary_by_name["frozen_lora"] + + comparison = { + "dense": dense, + "frozen_lora": frozen_lora, + "delta": { + "dynamic_peak_bytes": frozen_lora["dynamic_peak_bytes"] - dense["dynamic_peak_bytes"], + "dynamic_peak_gib": frozen_lora["dynamic_peak_gib"] - dense["dynamic_peak_gib"], + "overall_peak_bytes": frozen_lora["overall_peak_bytes"] - dense["overall_peak_bytes"], + "overall_peak_gib": frozen_lora["overall_peak_gib"] - dense["overall_peak_gib"], + }, + } + + comparison_path = OUTPUT_DIR / "comparison.json" + comparison_path.write_text(json.dumps(comparison, indent=2), encoding="utf-8") + return comparison_path + + +def run_mosaic_analysis() -> None: + """Analyze all known snapshots and emit a comparison summary.""" + summaries = [analyze_snapshot(name) for name in SNAPSHOT_NAMES] + comparison_path = write_comparison(summaries) + sys.stdout.write(f"comparison: {comparison_path}\n") + + +def main() -> None: + """Run profiling and, when available, Mosaic post-processing.""" + args = parse_args() + + SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) + + if args.mosaic_only: + run_mosaic_analysis() + return + + inputs, labels = build_inputs() + + run_profile("dense", make_dense_model(), inputs, labels) + run_profile("frozen_lora", make_lora_model(), inputs, labels) + + if DEVICE.type == "cuda": + run_mosaic_analysis() + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index 91c4ae6..3c85427 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,23 @@ version = 1 revision = 3 requires-python = ">=3.12, <4.0" +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] + +[[package]] +name = "absl-py" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/c7/8de93764ad66968d19329a7e0c147a2bb3c7054c554d4a119111b8f9440f/absl_py-2.4.0.tar.gz", hash = "sha256:8c6af82722b35cf71e0f4d1d47dcaebfff286e27110a99fc359349b247dfb5d4", size = 116543, upload-time = "2026-01-28T10:17:05.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload-time = "2026-01-28T10:17:04.19Z" }, +] [[package]] name = "accelerate" @@ -20,6 +37,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/46/02ac5e262d4af18054b3e922b2baedbb2a03289ee792162de60a865defc5/accelerate-1.13.0-py3-none-any.whl", hash = "sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0", size = 383744, upload-time = "2026-03-04T19:34:10.313Z" }, ] +[[package]] +name = "altair" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "typing-extensions", marker = "python_full_version < '3.15'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/c0/184a89bd5feba14ff3c41cfaf1dd8a82c05f5ceedbc92145e17042eb08a4/altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4", size = 763834, upload-time = "2025-11-12T08:59:11.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/33/ef2f2409450ef6daa61459d5de5c08128e7d3edb773fefd0a324d1310238/altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8", size = 795410, upload-time = "2025-11-12T08:59:09.804Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "antlr4-python3-runtime" +version = "4.9.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" } + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, +] + [[package]] name = "appnope" version = "0.1.4" @@ -439,12 +500,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, ] +[[package]] +name = "cryptography" +version = "46.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" }, + { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, + { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, + { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, + { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, + { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, + { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, + { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, + { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, + { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" }, + { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, + { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, + { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, + { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, + { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, + { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" }, + { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" }, + { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, + { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, + { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, + { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, + { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, + { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, + { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, + { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, + { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" }, + { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" }, +] + [[package]] name = "cuda-bindings" version = "12.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-pathfinder" }, + { name = "cuda-pathfinder", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" }, @@ -653,6 +767,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/4c/cc8c68196db727cfc1432f2ad5de50aa6707e630d44b2e6361dc06d8f134/griffelib-2.0.1-py3-none-any.whl", hash = "sha256:b769eed581c0e857d362fc8fcd8e57ecd2330c124b6104ac8b4c1c86d76970aa", size = 142377, upload-time = "2026-03-23T21:04:01.116Z" }, ] +[[package]] +name = "grpcio" +version = "1.78.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/8a/3d098f35c143a89520e568e6539cc098fcd294495910e359889ce8741c84/grpcio-1.78.0.tar.gz", hash = "sha256:7382b95189546f375c174f53a5fa873cef91c4b8005faa05cc5b3beea9c4f1c5", size = 12852416, upload-time = "2026-02-06T09:57:18.093Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/f4/7384ed0178203d6074446b3c4f46c90a22ddf7ae0b3aee521627f54cfc2a/grpcio-1.78.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:f9ab915a267fc47c7e88c387a3a28325b58c898e23d4995f765728f4e3dedb97", size = 5913985, upload-time = "2026-02-06T09:55:26.832Z" }, + { url = "https://files.pythonhosted.org/packages/81/ed/be1caa25f06594463f685b3790b320f18aea49b33166f4141bfdc2bfb236/grpcio-1.78.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3f8904a8165ab21e07e58bf3e30a73f4dffc7a1e0dbc32d51c61b5360d26f43e", size = 11811853, upload-time = "2026-02-06T09:55:29.224Z" }, + { url = "https://files.pythonhosted.org/packages/24/a7/f06d151afc4e64b7e3cc3e872d331d011c279aaab02831e40a81c691fb65/grpcio-1.78.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:859b13906ce098c0b493af92142ad051bf64c7870fa58a123911c88606714996", size = 6475766, upload-time = "2026-02-06T09:55:31.825Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a8/4482922da832ec0082d0f2cc3a10976d84a7424707f25780b82814aafc0a/grpcio-1.78.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b2342d87af32790f934a79c3112641e7b27d63c261b8b4395350dad43eff1dc7", size = 7170027, upload-time = "2026-02-06T09:55:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/54/bf/f4a3b9693e35d25b24b0b39fa46d7d8a3c439e0a3036c3451764678fec20/grpcio-1.78.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:12a771591ae40bc65ba67048fa52ef4f0e6db8279e595fd349f9dfddeef571f9", size = 6690766, upload-time = "2026-02-06T09:55:36.902Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b9/521875265cc99fe5ad4c5a17010018085cae2810a928bf15ebe7d8bcd9cc/grpcio-1.78.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:185dea0d5260cbb2d224c507bf2a5444d5abbb1fa3594c1ed7e4c709d5eb8383", size = 7266161, upload-time = "2026-02-06T09:55:39.824Z" }, + { url = "https://files.pythonhosted.org/packages/05/86/296a82844fd40a4ad4a95f100b55044b4f817dece732bf686aea1a284147/grpcio-1.78.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51b13f9aed9d59ee389ad666b8c2214cc87b5de258fa712f9ab05f922e3896c6", size = 8253303, upload-time = "2026-02-06T09:55:42.353Z" }, + { url = "https://files.pythonhosted.org/packages/f3/e4/ea3c0caf5468537f27ad5aab92b681ed7cc0ef5f8c9196d3fd42c8c2286b/grpcio-1.78.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd5f135b1bd58ab088930b3c613455796dfa0393626a6972663ccdda5b4ac6ce", size = 7698222, upload-time = "2026-02-06T09:55:44.629Z" }, + { url = "https://files.pythonhosted.org/packages/d7/47/7f05f81e4bb6b831e93271fb12fd52ba7b319b5402cbc101d588f435df00/grpcio-1.78.0-cp312-cp312-win32.whl", hash = "sha256:94309f498bcc07e5a7d16089ab984d42ad96af1d94b5a4eb966a266d9fcabf68", size = 4066123, upload-time = "2026-02-06T09:55:47.644Z" }, + { url = "https://files.pythonhosted.org/packages/ad/e7/d6914822c88aa2974dbbd10903d801a28a19ce9cd8bad7e694cbbcf61528/grpcio-1.78.0-cp312-cp312-win_amd64.whl", hash = "sha256:9566fe4ababbb2610c39190791e5b829869351d14369603702e890ef3ad2d06e", size = 4797657, upload-time = "2026-02-06T09:55:49.86Z" }, + { url = "https://files.pythonhosted.org/packages/05/a9/8f75894993895f361ed8636cd9237f4ab39ef87fd30db17467235ed1c045/grpcio-1.78.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:ce3a90455492bf8bfa38e56fbbe1dbd4f872a3d8eeaf7337dc3b1c8aa28c271b", size = 5920143, upload-time = "2026-02-06T09:55:52.035Z" }, + { url = "https://files.pythonhosted.org/packages/55/06/0b78408e938ac424100100fd081189451b472236e8a3a1f6500390dc4954/grpcio-1.78.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:2bf5e2e163b356978b23652c4818ce4759d40f4712ee9ec5a83c4be6f8c23a3a", size = 11803926, upload-time = "2026-02-06T09:55:55.494Z" }, + { url = "https://files.pythonhosted.org/packages/88/93/b59fe7832ff6ae3c78b813ea43dac60e295fa03606d14d89d2e0ec29f4f3/grpcio-1.78.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8f2ac84905d12918e4e55a16da17939eb63e433dc11b677267c35568aa63fc84", size = 6478628, upload-time = "2026-02-06T09:55:58.533Z" }, + { url = "https://files.pythonhosted.org/packages/ed/df/e67e3734527f9926b7d9c0dde6cd998d1d26850c3ed8eeec81297967ac67/grpcio-1.78.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b58f37edab4a3881bc6c9bca52670610e0c9ca14e2ea3cf9debf185b870457fb", size = 7173574, upload-time = "2026-02-06T09:56:01.786Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/cc03fffb07bfba982a9ec097b164e8835546980aec25ecfa5f9c1a47e022/grpcio-1.78.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:735e38e176a88ce41840c21bb49098ab66177c64c82426e24e0082500cc68af5", size = 6692639, upload-time = "2026-02-06T09:56:04.529Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9a/289c32e301b85bdb67d7ec68b752155e674ee3ba2173a1858f118e399ef3/grpcio-1.78.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2045397e63a7a0ee7957c25f7dbb36ddc110e0cfb418403d110c0a7a68a844e9", size = 7268838, upload-time = "2026-02-06T09:56:08.397Z" }, + { url = "https://files.pythonhosted.org/packages/0e/79/1be93f32add280461fa4773880196572563e9c8510861ac2da0ea0f892b6/grpcio-1.78.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9f136fbafe7ccf4ac7e8e0c28b31066e810be52d6e344ef954a3a70234e1702", size = 8251878, upload-time = "2026-02-06T09:56:10.914Z" }, + { url = "https://files.pythonhosted.org/packages/65/65/793f8e95296ab92e4164593674ae6291b204bb5f67f9d4a711489cd30ffa/grpcio-1.78.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:748b6138585379c737adc08aeffd21222abbda1a86a0dca2a39682feb9196c20", size = 7695412, upload-time = "2026-02-06T09:56:13.593Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9f/1e233fe697ecc82845942c2822ed06bb522e70d6771c28d5528e4c50f6a4/grpcio-1.78.0-cp313-cp313-win32.whl", hash = "sha256:271c73e6e5676afe4fc52907686670c7cea22ab2310b76a59b678403ed40d670", size = 4064899, upload-time = "2026-02-06T09:56:15.601Z" }, + { url = "https://files.pythonhosted.org/packages/4d/27/d86b89e36de8a951501fb06a0f38df19853210f341d0b28f83f4aa0ffa08/grpcio-1.78.0-cp313-cp313-win_amd64.whl", hash = "sha256:f2d4e43ee362adfc05994ed479334d5a451ab7bc3f3fee1b796b8ca66895acb4", size = 4797393, upload-time = "2026-02-06T09:56:17.882Z" }, + { url = "https://files.pythonhosted.org/packages/29/f2/b56e43e3c968bfe822fa6ce5bca10d5c723aa40875b48791ce1029bb78c7/grpcio-1.78.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:e87cbc002b6f440482b3519e36e1313eb5443e9e9e73d6a52d43bd2004fcfd8e", size = 5920591, upload-time = "2026-02-06T09:56:20.758Z" }, + { url = "https://files.pythonhosted.org/packages/5d/81/1f3b65bd30c334167bfa8b0d23300a44e2725ce39bba5b76a2460d85f745/grpcio-1.78.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:c41bc64626db62e72afec66b0c8a0da76491510015417c127bfc53b2fe6d7f7f", size = 11813685, upload-time = "2026-02-06T09:56:24.315Z" }, + { url = "https://files.pythonhosted.org/packages/0e/1c/bbe2f8216a5bd3036119c544d63c2e592bdf4a8ec6e4a1867592f4586b26/grpcio-1.78.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8dfffba826efcf366b1e3ccc37e67afe676f290e13a3b48d31a46739f80a8724", size = 6487803, upload-time = "2026-02-06T09:56:27.367Z" }, + { url = "https://files.pythonhosted.org/packages/16/5c/a6b2419723ea7ddce6308259a55e8e7593d88464ce8db9f4aa857aba96fa/grpcio-1.78.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:74be1268d1439eaaf552c698cdb11cd594f0c49295ae6bb72c34ee31abbe611b", size = 7173206, upload-time = "2026-02-06T09:56:29.876Z" }, + { url = "https://files.pythonhosted.org/packages/df/1e/b8801345629a415ea7e26c83d75eb5dbe91b07ffe5210cc517348a8d4218/grpcio-1.78.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:be63c88b32e6c0f1429f1398ca5c09bc64b0d80950c8bb7807d7d7fb36fb84c7", size = 6693826, upload-time = "2026-02-06T09:56:32.305Z" }, + { url = "https://files.pythonhosted.org/packages/34/84/0de28eac0377742679a510784f049738a80424b17287739fc47d63c2439e/grpcio-1.78.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3c586ac70e855c721bda8f548d38c3ca66ac791dc49b66a8281a1f99db85e452", size = 7277897, upload-time = "2026-02-06T09:56:34.915Z" }, + { url = "https://files.pythonhosted.org/packages/ca/9c/ad8685cfe20559a9edb66f735afdcb2b7d3de69b13666fdfc542e1916ebd/grpcio-1.78.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:35eb275bf1751d2ffbd8f57cdbc46058e857cf3971041521b78b7db94bdaf127", size = 8252404, upload-time = "2026-02-06T09:56:37.553Z" }, + { url = "https://files.pythonhosted.org/packages/3c/05/33a7a4985586f27e1de4803887c417ec7ced145ebd069bc38a9607059e2b/grpcio-1.78.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:207db540302c884b8848036b80db352a832b99dfdf41db1eb554c2c2c7800f65", size = 7696837, upload-time = "2026-02-06T09:56:40.173Z" }, + { url = "https://files.pythonhosted.org/packages/73/77/7382241caf88729b106e49e7d18e3116216c778e6a7e833826eb96de22f7/grpcio-1.78.0-cp314-cp314-win32.whl", hash = "sha256:57bab6deef2f4f1ca76cc04565df38dc5713ae6c17de690721bdf30cb1e0545c", size = 4142439, upload-time = "2026-02-06T09:56:43.258Z" }, + { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + [[package]] name = "hf-xet" version = "1.4.2" @@ -685,6 +849,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" }, ] +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + [[package]] name = "huggingface-hub" version = "0.36.2" @@ -1129,6 +1330,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" }, ] +[[package]] +name = "mcp" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -1266,6 +1492,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/28/79f0f8de97cce916d5ae88a7bee1ad724855e83e6019c0b4d5b3fabc80f3/mkdocstrings_python-2.0.3-py3-none-any.whl", hash = "sha256:0b83513478bdfd803ff05aa43e9b1fca9dd22bcd9471f09ca6257f009bc5ee12", size = 104779, upload-time = "2026-02-20T10:38:34.517Z" }, ] +[[package]] +name = "mosaic" +version = "0.1.0" +source = { git = "https://github.com/facebookresearch/mosaic.git?rev=27a16228ae22222cf2b834d31afbf64f2dff66e0#27a16228ae22222cf2b834d31afbf64f2dff66e0" } + [[package]] name = "mpmath" version = "1.3.0" @@ -1275,6 +1506,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "narwhals" +version = "2.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/b4/02a8add181b8d2cd5da3b667cd102ae536e8c9572ab1a130816d70a89edb/narwhals-2.18.0.tar.gz", hash = "sha256:1de5cee338bc17c338c6278df2c38c0dd4290499fcf70d75e0a51d5f22a6e960", size = 620222, upload-time = "2026-03-10T15:51:27.14Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/75/0b4a10da17a44cf13567d08a9c7632a285297e46253263f1ae119129d10a/narwhals-2.18.0-py3-none-any.whl", hash = "sha256:68378155ee706ac9c5b25868ef62ecddd62947b6df7801a0a156bc0a615d2d0d", size = 444865, upload-time = "2026-03-10T15:51:24.085Z" }, +] + [[package]] name = "nbformat" version = "5.10.4" @@ -1427,7 +1667,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -1438,7 +1678,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -1465,9 +1705,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -1478,7 +1718,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -1524,6 +1764,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, ] +[[package]] +name = "omegaconf" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "antlr4-python3-runtime" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, +] + [[package]] name = "packaging" version = "26.0" @@ -1542,6 +1795,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746, upload-time = "2024-08-25T14:17:22.55Z" }, ] +[[package]] +name = "pandas" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" }, + { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" }, + { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" }, + { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" }, + { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" }, + { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" }, + { url = "https://files.pythonhosted.org/packages/0b/48/aad6ec4f8d007534c091e9a7172b3ec1b1ee6d99a9cbb936b5eab6c6cf58/pandas-3.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5272627187b5d9c20e55d27caf5f2cd23e286aba25cadf73c8590e432e2b7262", size = 10317509, upload-time = "2026-02-17T22:18:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/a8/14/5990826f779f79148ae9d3a2c39593dc04d61d5d90541e71b5749f35af95/pandas-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:661e0f665932af88c7877f31da0dc743fe9c8f2524bdffe23d24fdcb67ef9d56", size = 9860561, upload-time = "2026-02-17T22:19:02.265Z" }, + { url = "https://files.pythonhosted.org/packages/fa/80/f01ff54664b6d70fed71475543d108a9b7c888e923ad210795bef04ffb7d/pandas-3.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75e6e292ff898679e47a2199172593d9f6107fd2dd3617c22c2946e97d5df46e", size = 10365506, upload-time = "2026-02-17T22:19:05.017Z" }, + { url = "https://files.pythonhosted.org/packages/f2/85/ab6d04733a7d6ff32bfc8382bf1b07078228f5d6ebec5266b91bfc5c4ff7/pandas-3.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ff8cf1d2896e34343197685f432450ec99a85ba8d90cce2030c5eee2ef98791", size = 10873196, upload-time = "2026-02-17T22:19:07.204Z" }, + { url = "https://files.pythonhosted.org/packages/48/a9/9301c83d0b47c23ac5deab91c6b39fd98d5b5db4d93b25df8d381451828f/pandas-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eca8b4510f6763f3d37359c2105df03a7a221a508f30e396a51d0713d462e68a", size = 11370859, upload-time = "2026-02-17T22:19:09.436Z" }, + { url = "https://files.pythonhosted.org/packages/59/fe/0c1fc5bd2d29c7db2ab372330063ad555fb83e08422829c785f5ec2176ca/pandas-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06aff2ad6f0b94a17822cf8b83bbb563b090ed82ff4fe7712db2ce57cd50d9b8", size = 11924584, upload-time = "2026-02-17T22:19:11.562Z" }, + { url = "https://files.pythonhosted.org/packages/d6/7d/216a1588b65a7aa5f4535570418a599d943c85afb1d95b0876fc00aa1468/pandas-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:9fea306c783e28884c29057a1d9baa11a349bbf99538ec1da44c8476563d1b25", size = 9742769, upload-time = "2026-02-17T22:19:13.926Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cb/810a22a6af9a4e97c8ab1c946b47f3489c5bca5adc483ce0ffc84c9cc768/pandas-3.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:a8d37a43c52917427e897cb2e429f67a449327394396a81034a4449b99afda59", size = 9043855, upload-time = "2026-02-17T22:19:16.09Z" }, + { url = "https://files.pythonhosted.org/packages/92/fa/423c89086cca1f039cf1253c3ff5b90f157b5b3757314aa635f6bf3e30aa/pandas-3.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d54855f04f8246ed7b6fc96b05d4871591143c46c0b6f4af874764ed0d2d6f06", size = 10752673, upload-time = "2026-02-17T22:19:18.304Z" }, + { url = "https://files.pythonhosted.org/packages/22/23/b5a08ec1f40020397f0faba72f1e2c11f7596a6169c7b3e800abff0e433f/pandas-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e1b677accee34a09e0dc2ce5624e4a58a1870ffe56fc021e9caf7f23cd7668f", size = 10404967, upload-time = "2026-02-17T22:19:20.726Z" }, + { url = "https://files.pythonhosted.org/packages/5c/81/94841f1bb4afdc2b52a99daa895ac2c61600bb72e26525ecc9543d453ebc/pandas-3.0.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9cabbdcd03f1b6cd254d6dda8ae09b0252524be1592594c00b7895916cb1324", size = 10320575, upload-time = "2026-02-17T22:19:24.919Z" }, + { url = "https://files.pythonhosted.org/packages/0a/8b/2ae37d66a5342a83adadfd0cb0b4bf9c3c7925424dd5f40d15d6cfaa35ee/pandas-3.0.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ae2ab1f166668b41e770650101e7090824fd34d17915dd9cd479f5c5e0065e9", size = 10710921, upload-time = "2026-02-17T22:19:27.181Z" }, + { url = "https://files.pythonhosted.org/packages/a2/61/772b2e2757855e232b7ccf7cb8079a5711becb3a97f291c953def15a833f/pandas-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6bf0603c2e30e2cafac32807b06435f28741135cb8697eae8b28c7d492fc7d76", size = 11334191, upload-time = "2026-02-17T22:19:29.411Z" }, + { url = "https://files.pythonhosted.org/packages/1b/08/b16c6df3ef555d8495d1d265a7963b65be166785d28f06a350913a4fac78/pandas-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c426422973973cae1f4a23e51d4ae85974f44871b24844e4f7de752dd877098", size = 11782256, upload-time = "2026-02-17T22:19:32.34Z" }, + { url = "https://files.pythonhosted.org/packages/55/80/178af0594890dee17e239fca96d3d8670ba0f5ff59b7d0439850924a9c09/pandas-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b03f91ae8c10a85c1613102c7bef5229b5379f343030a3ccefeca8a33414cf35", size = 10485047, upload-time = "2026-02-17T22:19:34.605Z" }, + { url = "https://files.pythonhosted.org/packages/bb/8b/4bb774a998b97e6c2fd62a9e6cfdaae133b636fd1c468f92afb4ae9a447a/pandas-3.0.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:99d0f92ed92d3083d140bf6b97774f9f13863924cf3f52a70711f4e7588f9d0a", size = 10322465, upload-time = "2026-02-17T22:19:36.803Z" }, + { url = "https://files.pythonhosted.org/packages/72/3a/5b39b51c64159f470f1ca3b1c2a87da290657ca022f7cd11442606f607d1/pandas-3.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3b66857e983208654294bb6477b8a63dee26b37bdd0eb34d010556e91261784f", size = 9910632, upload-time = "2026-02-17T22:19:39.001Z" }, + { url = "https://files.pythonhosted.org/packages/4e/f7/b449ffb3f68c11da12fc06fbf6d2fa3a41c41e17d0284d23a79e1c13a7e4/pandas-3.0.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56cf59638bf24dc9bdf2154c81e248b3289f9a09a6d04e63608c159022352749", size = 10440535, upload-time = "2026-02-17T22:19:41.157Z" }, + { url = "https://files.pythonhosted.org/packages/55/77/6ea82043db22cb0f2bbfe7198da3544000ddaadb12d26be36e19b03a2dc5/pandas-3.0.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1a9f55e0f46951874b863d1f3906dcb57df2d9be5c5847ba4dfb55b2c815249", size = 10893940, upload-time = "2026-02-17T22:19:43.493Z" }, + { url = "https://files.pythonhosted.org/packages/03/30/f1b502a72468c89412c1b882a08f6eed8a4ee9dc033f35f65d0663df6081/pandas-3.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1849f0bba9c8a2fb0f691d492b834cc8dadf617e29015c66e989448d58d011ee", size = 11442711, upload-time = "2026-02-17T22:19:46.074Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f0/ebb6ddd8fc049e98cabac5c2924d14d1dda26a20adb70d41ea2e428d3ec4/pandas-3.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3d288439e11b5325b02ae6e9cc83e6805a62c40c5a6220bea9beb899c073b1c", size = 11963918, upload-time = "2026-02-17T22:19:48.838Z" }, + { url = "https://files.pythonhosted.org/packages/09/f8/8ce132104074f977f907442790eaae24e27bce3b3b454e82faa3237ff098/pandas-3.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:93325b0fe372d192965f4cca88d97667f49557398bbf94abdda3bf1b591dbe66", size = 9862099, upload-time = "2026-02-17T22:19:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b7/6af9aac41ef2456b768ef0ae60acf8abcebb450a52043d030a65b4b7c9bd/pandas-3.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:97ca08674e3287c7148f4858b01136f8bdfe7202ad25ad04fec602dd1d29d132", size = 9185333, upload-time = "2026-02-17T22:19:53.266Z" }, + { url = "https://files.pythonhosted.org/packages/66/fc/848bb6710bc6061cb0c5badd65b92ff75c81302e0e31e496d00029fe4953/pandas-3.0.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:58eeb1b2e0fb322befcf2bbc9ba0af41e616abadb3d3414a6bc7167f6cbfce32", size = 10772664, upload-time = "2026-02-17T22:19:55.806Z" }, + { url = "https://files.pythonhosted.org/packages/69/5c/866a9bbd0f79263b4b0db6ec1a341be13a1473323f05c122388e0f15b21d/pandas-3.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cd9af1276b5ca9e298bd79a26bda32fa9cc87ed095b2a9a60978d2ca058eaf87", size = 10421286, upload-time = "2026-02-17T22:19:58.091Z" }, + { url = "https://files.pythonhosted.org/packages/51/a4/2058fb84fb1cfbfb2d4a6d485e1940bb4ad5716e539d779852494479c580/pandas-3.0.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f87a04984d6b63788327cd9f79dda62b7f9043909d2440ceccf709249ca988", size = 10342050, upload-time = "2026-02-17T22:20:01.376Z" }, + { url = "https://files.pythonhosted.org/packages/22/1b/674e89996cc4be74db3c4eb09240c4bb549865c9c3f5d9b086ff8fcfbf00/pandas-3.0.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85fe4c4df62e1e20f9db6ebfb88c844b092c22cd5324bdcf94bfa2fc1b391221", size = 10740055, upload-time = "2026-02-17T22:20:04.328Z" }, + { url = "https://files.pythonhosted.org/packages/d0/f8/e954b750764298c22fa4614376531fe63c521ef517e7059a51f062b87dca/pandas-3.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:331ca75a2f8672c365ae25c0b29e46f5ac0c6551fdace8eec4cd65e4fac271ff", size = 11357632, upload-time = "2026-02-17T22:20:06.647Z" }, + { url = "https://files.pythonhosted.org/packages/6d/02/c6e04b694ffd68568297abd03588b6d30295265176a5c01b7459d3bc35a3/pandas-3.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:15860b1fdb1973fffade772fdb931ccf9b2f400a3f5665aef94a00445d7d8dd5", size = 11810974, upload-time = "2026-02-17T22:20:08.946Z" }, + { url = "https://files.pythonhosted.org/packages/89/41/d7dfb63d2407f12055215070c42fc6ac41b66e90a2946cdc5e759058398b/pandas-3.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:44f1364411d5670efa692b146c748f4ed013df91ee91e9bec5677fb1fd58b937", size = 10884622, upload-time = "2026-02-17T22:20:11.711Z" }, + { url = "https://files.pythonhosted.org/packages/68/b0/34937815889fa982613775e4b97fddd13250f11012d769949c5465af2150/pandas-3.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:108dd1790337a494aa80e38def654ca3f0968cf4f362c85f44c15e471667102d", size = 9452085, upload-time = "2026-02-17T22:20:14.331Z" }, +] + [[package]] name = "parso" version = "0.8.6" @@ -1595,7 +1900,7 @@ name = "pexpect" version = "4.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ptyprocess" }, + { name = "ptyprocess", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } wheels = [ @@ -1730,6 +2035,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" }, ] +[[package]] +name = "protobuf" +version = "7.34.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" }, + { url = "https://files.pythonhosted.org/packages/eb/9d/aa69df2724ff63efa6f72307b483ce0827f4347cc6d6df24b59e26659fef/protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b", size = 325753, upload-time = "2026-03-20T17:34:38.751Z" }, + { url = "https://files.pythonhosted.org/packages/92/e8/d174c91fd48e50101943f042b09af9029064810b734e4160bbe282fa1caa/protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a", size = 340198, upload-time = "2026-03-20T17:34:39.871Z" }, + { url = "https://files.pythonhosted.org/packages/53/1b/3b431694a4dc6d37b9f653f0c64b0a0d9ec074ee810710c0c3da21d67ba7/protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4", size = 324267, upload-time = "2026-03-20T17:34:41.1Z" }, + { url = "https://files.pythonhosted.org/packages/85/29/64de04a0ac142fb685fd09999bc3d337943fb386f3a0ec57f92fd8203f97/protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a", size = 426628, upload-time = "2026-03-20T17:34:42.536Z" }, + { url = "https://files.pythonhosted.org/packages/4d/87/cb5e585192a22b8bd457df5a2c16a75ea0db9674c3a0a39fc9347d84e075/protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c", size = 437901, upload-time = "2026-03-20T17:34:44.112Z" }, + { url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" }, +] + [[package]] name = "psutil" version = "7.2.2" @@ -1785,6 +2105,106 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1794,6 +2214,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pymdown-extensions" version = "10.21" @@ -1882,6 +2316,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/3c/2005227cb951df502412de2fa781f800663cccbef8d90ec6f1b371ac2c0d/python_discovery-1.2.0-py3-none-any.whl", hash = "sha256:1e108f1bbe2ed0ef089823d28805d5ad32be8e734b86a5f212bf89b71c266e4a", size = 31524, upload-time = "2026-03-19T01:43:07.045Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" @@ -2255,11 +2723,11 @@ wheels = [ [[package]] name = "setuptools" -version = "82.0.1" +version = "80.10.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } +sdist = { url = "https://files.pythonhosted.org/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343, upload-time = "2026-01-25T22:38:17.252Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, + { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" }, ] [[package]] @@ -2271,6 +2739,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sse-starlette" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/14/2f/9223c24f568bb7a0c03d751e609844dce0968f13b39a3f73fbb3a96cd27a/sse_starlette-3.3.3.tar.gz", hash = "sha256:72a95d7575fd5129bd0ae15275ac6432bb35ac542fdebb82889c24bb9f3f4049", size = 32420, upload-time = "2026-03-17T20:05:55.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/e2/b8cff57a67dddf9a464d7e943218e031617fb3ddc133aeeb0602ff5f6c85/sse_starlette-3.3.3-py3-none-any.whl", hash = "sha256:c5abb5082a1cc1c6294d89c5290c46b5f67808cfdb612b7ec27e8ba061c22e8d", size = 14329, upload-time = "2026-03-17T20:05:54.35Z" }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -2285,6 +2766,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, ] +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + [[package]] name = "stella" version = "0.1" @@ -2328,6 +2822,26 @@ dev = [ { name = "ty" }, { name = "typeguard" }, ] +mosaic = [ + { name = "altair" }, + { name = "click" }, + { name = "mcp" }, + { name = "mosaic" }, + { name = "omegaconf" }, + { name = "pandas" }, + { name = "tabulate" }, +] +profiling = [ + { name = "altair" }, + { name = "click" }, + { name = "mcp" }, + { name = "mosaic" }, + { name = "omegaconf" }, + { name = "pandas" }, + { name = "tabulate" }, + { name = "tensorboard" }, + { name = "torch-tb-profiler" }, +] [package.metadata] requires-dist = [ @@ -2361,6 +2875,26 @@ dev = [ { name = "ty", specifier = ">=0.0.6" }, { name = "typeguard", specifier = ">=4.4.1" }, ] +mosaic = [ + { name = "altair", specifier = ">=5.5.0" }, + { name = "click", specifier = ">=8.3.0" }, + { name = "mcp", specifier = ">=1.0.0" }, + { name = "mosaic", git = "https://github.com/facebookresearch/mosaic.git?rev=27a16228ae22222cf2b834d31afbf64f2dff66e0" }, + { name = "omegaconf", specifier = ">=2.3.0" }, + { name = "pandas", specifier = ">=2.3.3" }, + { name = "tabulate", specifier = ">=0.9.0" }, +] +profiling = [ + { name = "altair", specifier = ">=5.5.0" }, + { name = "click", specifier = ">=8.3.0" }, + { name = "mcp", specifier = ">=1.0.0" }, + { name = "mosaic", git = "https://github.com/facebookresearch/mosaic.git?rev=27a16228ae22222cf2b834d31afbf64f2dff66e0" }, + { name = "omegaconf", specifier = ">=2.3.0" }, + { name = "pandas", specifier = ">=2.3.3" }, + { name = "tabulate", specifier = ">=0.9.0" }, + { name = "tensorboard", specifier = ">=2.20.0" }, + { name = "torch-tb-profiler", specifier = ">=0.4.3" }, +] [[package]] name = "sympy" @@ -2374,6 +2908,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "tabulate" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" }, +] + +[[package]] +name = "tensorboard" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "absl-py" }, + { name = "grpcio" }, + { name = "markdown" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "protobuf" }, + { name = "setuptools" }, + { name = "tensorboard-data-server" }, + { name = "werkzeug" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/d9/a5db55f88f258ac669a92858b70a714bbbd5acd993820b41ec4a96a4d77f/tensorboard-2.20.0-py3-none-any.whl", hash = "sha256:9dc9f978cb84c0723acf9a345d96c184f0293d18f166bb8d59ee098e6cfaaba6", size = 5525680, upload-time = "2025-07-17T19:20:49.638Z" }, +] + +[[package]] +name = "tensorboard-data-server" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/13/e503968fefabd4c6b2650af21e110aa8466fe21432cd7c43a84577a89438/tensorboard_data_server-0.7.2-py3-none-any.whl", hash = "sha256:7e0610d205889588983836ec05dc098e80f97b7e7bbff7e994ebb78f578d0ddb", size = 2356, upload-time = "2023-10-23T21:23:32.16Z" }, + { url = "https://files.pythonhosted.org/packages/b7/85/dabeaf902892922777492e1d253bb7e1264cadce3cea932f7ff599e53fea/tensorboard_data_server-0.7.2-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:9fe5d24221b29625dbc7328b0436ca7fc1c23de4acf4d272f1180856e32f9f60", size = 4823598, upload-time = "2023-10-23T21:23:33.714Z" }, + { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" }, +] + [[package]] name = "termcolor" version = "3.3.0" @@ -2477,6 +3050,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816, upload-time = "2026-01-21T16:22:05.312Z" }, ] +[[package]] +name = "torch-tb-profiler" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pandas" }, + { name = "tensorboard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/3f/1bdbeae21db91179c295bcecb7194d57cd0e184125c69e5a6e315ea4770f/torch_tb_profiler-0.4.3.tar.gz", hash = "sha256:8b8d29b2de960b3c4423087b23cec29beaf9ac3a8c7b046c18fd25b218f726b1", size = 1057029, upload-time = "2023-10-06T15:28:57.961Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/d0/891ec43349f287ea5c313ebc1320e4eda38ccd4c7a0951657213467eaab5/torch_tb_profiler-0.4.3-py3-none-any.whl", hash = "sha256:207a49b05572dd983e4ab29eb5e0fcadd60374a8f93c78ec638217e8d18788dc", size = 1053410, upload-time = "2023-10-06T15:28:56.53Z" }, +] + [[package]] name = "tornado" version = "6.5.5" @@ -2593,6 +3179,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "tzdata" +version = "2025.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" @@ -2602,6 +3209,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uvicorn" +version = "0.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click", marker = "sys_platform != 'emscripten'" }, + { name = "h11", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" }, +] + [[package]] name = "virtualenv" version = "21.2.0" @@ -2650,6 +3270,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, ] +[[package]] +name = "werkzeug" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/f1/ee81806690a87dab5f5653c1f146c92bc066d7f4cebc603ef88eb9e13957/werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25", size = 864736, upload-time = "2026-02-19T15:17:18.884Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/ec/d58832f89ede95652fd01f4f24236af7d32b70cab2196dfcc2d2fd13c5c2/werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131", size = 225166, upload-time = "2026-02-19T15:17:17.475Z" }, +] + [[package]] name = "widgetsnbextension" version = "4.0.15" From 8eb6c092e3d36ec0f3fc56ea6e9a6b4524736288 Mon Sep 17 00:00:00 2001 From: gklajer Date: Thu, 26 Mar 2026 08:52:33 +0000 Subject: [PATCH 2/4] feat(profiling): codify memory theory comparisons --- scripts/lora_memory_analysis.py | 137 +++--- src/stellatscale/__init__.py | 28 +- src/stellatscale/memory_experiment.py | 575 ++++++++++++++++++++++++++ tests/test_memory_experiment.py | 190 +++++++++ 4 files changed, 876 insertions(+), 54 deletions(-) create mode 100644 src/stellatscale/memory_experiment.py create mode 100644 tests/test_memory_experiment.py diff --git a/scripts/lora_memory_analysis.py b/scripts/lora_memory_analysis.py index 1da5240..e4a5e9e 100644 --- a/scripts/lora_memory_analysis.py +++ b/scripts/lora_memory_analysis.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import importlib import json import sys from contextlib import contextmanager, redirect_stdout @@ -20,6 +21,17 @@ tensorboard_trace_handler, ) +from stellatscale.memory_experiment import ( + ComparisonTolerances, + FrozenLoRALinear, + LinearModelVariant, + MemoryExperimentConfig, + MemorySummary, + build_theoretical_summary, + bytes_to_gib, + compare_theory_to_measurement, +) + if TYPE_CHECKING: from collections.abc import Callable, Iterator @@ -42,12 +54,11 @@ class MemoryAbstractProtocol(Protocol): OUTPUT_DIR = SNAPSHOT_DIR / "mosaic" ANNOTATIONS = ("## forward ##", "## backward ##", "## optimizer ##") SNAPSHOT_NAMES = ("dense", "frozen_lora") +COMPARISON_REPORT_PATH = OUTPUT_DIR / "theory_comparison.json" -STEPS = 5 -BATCH_SIZE = 16 -IN_FEATURES = 4096 -OUT_FEATURES = 4096 -RANK = 16 +EXPERIMENT_CONFIG = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16, steps=5, learning_rate=0.05 +) DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -63,39 +74,31 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -class LoRALinear(nn.Module): - """Linear layer with a frozen base projection and trainable LoRA adapters.""" - - def __init__(self, base: nn.Linear, rank: int) -> None: - super().__init__() - self.base = base - self.base.requires_grad_(requires_grad=False) - self.lora_a = nn.Linear(base.in_features, rank, bias=False) - self.lora_b = nn.Linear(rank, base.out_features, bias=False) - - def forward(self, inputs: torch.Tensor) -> torch.Tensor: - """Apply the frozen base projection and the LoRA update.""" - return self.base(inputs) + self.lora_b(self.lora_a(inputs)) - - def build_inputs() -> tuple[torch.Tensor, torch.Tensor]: """Create deterministic inputs and labels for the profiling run.""" generator = torch.Generator(device="cpu").manual_seed(7) - inputs = torch.randn(BATCH_SIZE, IN_FEATURES, generator=generator) - labels = torch.randn(BATCH_SIZE, OUT_FEATURES, generator=generator) + inputs = torch.randn( + EXPERIMENT_CONFIG.batch_size, EXPERIMENT_CONFIG.in_features, generator=generator + ) + labels = torch.randn( + EXPERIMENT_CONFIG.batch_size, EXPERIMENT_CONFIG.out_features, generator=generator + ) return inputs.to(DEVICE), labels.to(DEVICE) def make_dense_model() -> nn.Module: """Construct the dense linear baseline model.""" torch.manual_seed(7) - return nn.Linear(IN_FEATURES, OUT_FEATURES, bias=False) + return nn.Linear(EXPERIMENT_CONFIG.in_features, EXPERIMENT_CONFIG.out_features, bias=False) def make_lora_model() -> nn.Module: """Construct the frozen-base LoRA variant used for comparison.""" torch.manual_seed(7) - return LoRALinear(nn.Linear(IN_FEATURES, OUT_FEATURES, bias=False), rank=RANK) + return FrozenLoRALinear( + nn.Linear(EXPERIMENT_CONFIG.in_features, EXPERIMENT_CONFIG.out_features, bias=False), + rank=EXPERIMENT_CONFIG.lora_rank, + ) def profiler_activities() -> list[ProfilerActivity]: @@ -105,11 +108,6 @@ def profiler_activities() -> list[ProfilerActivity]: return [ProfilerActivity.CPU] -def bytes_to_gib(num_bytes: float) -> float: - """Convert bytes to gibibytes.""" - return num_bytes / 1024**3 - - @contextmanager def capture_snapshot(snapshot_path: Path) -> Iterator[None]: """Record a CUDA allocator snapshot around a profiling region.""" @@ -130,26 +128,26 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch model = model.to(DEVICE) loss_fn = nn.MSELoss() optimizer = torch.optim.AdamW( - [parameter for parameter in model.parameters() if parameter.requires_grad], lr=0.05 + [parameter for parameter in model.parameters() if parameter.requires_grad], + lr=EXPERIMENT_CONFIG.learning_rate, ) snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" trace_dir = SNAPSHOT_DIR / f"{name}_traces" + profile_schedule = schedule(wait=0, warmup=0, active=EXPERIMENT_CONFIG.steps, repeat=1) with ( capture_snapshot(snapshot_path), profile( activities=profiler_activities(), - schedule=schedule(wait=0, warmup=0, active=STEPS, repeat=1), # codespell:ignore warmup + schedule=profile_schedule, record_shapes=True, profile_memory=True, with_stack=True, on_trace_ready=tensorboard_trace_handler(str(trace_dir)), ) as profiler, ): - for _ in range(STEPS): - profiler.step() - + for _ in range(EXPERIMENT_CONFIG.steps): with record_function("## forward ##"): pred = model(inputs) @@ -160,6 +158,8 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch optimizer.step() optimizer.zero_grad(set_to_none=True) + profiler.step() + sys.stdout.write(f"{name}\n") sys.stdout.write(f" snapshot: {snapshot_path}\n") sys.stdout.write(f" trace_dir: {trace_dir}\n") @@ -168,12 +168,10 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch def load_mosaic() -> tuple[Any, Any, Any, Any]: """Import the Mosaic entry points required for report generation.""" try: - from mosaic.cmd.entry_point import ( # noqa: PLC0415 - get_memory_profile, - get_memory_usage_by_annotation_stage, - get_memory_usage_peak, + entry_point = importlib.import_module("mosaic.cmd.entry_point") + memory_abstract_module = importlib.import_module( + "mosaic.libmosaic.analyzer.memory_abstract" ) - from mosaic.libmosaic.analyzer.memory_abstract import MemoryAbstract # noqa: PLC0415 except ImportError as exc: message = ( "Mosaic is not importable in the current environment. Run `uv sync --group mosaic` " @@ -182,10 +180,10 @@ def load_mosaic() -> tuple[Any, Any, Any, Any]: raise ImportError(message) from exc return ( - get_memory_profile, - get_memory_usage_by_annotation_stage, - get_memory_usage_peak, - MemoryAbstract, + entry_point.get_memory_profile, + entry_point.get_memory_usage_by_annotation_stage, + entry_point.get_memory_usage_peak, + memory_abstract_module.MemoryAbstract, ) @@ -213,7 +211,7 @@ def build_peak_summary(name: str, memory_abstract: MemoryAbstractProtocol) -> di } -def analyze_snapshot(name: str) -> dict[str, Any]: +def analyze_snapshot(name: str) -> MemorySummary: """Generate Mosaic reports and a summary JSON file for one snapshot.""" snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" if not snapshot_path.exists(): @@ -292,23 +290,23 @@ def analyze_snapshot(name: str) -> dict[str, Any]: sys.stdout.write(f" peak_report: {peak_report_path}\n") sys.stdout.write(f" summary: {summary_path}\n") - return peak_summary + return MemorySummary.from_mapping(peak_summary) -def write_comparison(summaries: list[dict[str, Any]]) -> Path: +def write_comparison(summaries: list[MemorySummary]) -> Path: """Write a dense-vs-LoRA comparison JSON file.""" - summary_by_name = {summary["name"]: summary for summary in summaries} + summary_by_name = {summary.name: summary for summary in summaries} dense = summary_by_name["dense"] frozen_lora = summary_by_name["frozen_lora"] comparison = { - "dense": dense, - "frozen_lora": frozen_lora, + "dense": dense.to_dict(), + "frozen_lora": frozen_lora.to_dict(), "delta": { - "dynamic_peak_bytes": frozen_lora["dynamic_peak_bytes"] - dense["dynamic_peak_bytes"], - "dynamic_peak_gib": frozen_lora["dynamic_peak_gib"] - dense["dynamic_peak_gib"], - "overall_peak_bytes": frozen_lora["overall_peak_bytes"] - dense["overall_peak_bytes"], - "overall_peak_gib": frozen_lora["overall_peak_gib"] - dense["overall_peak_gib"], + "dynamic_peak_bytes": frozen_lora.dynamic_peak_bytes - dense.dynamic_peak_bytes, + "dynamic_peak_gib": frozen_lora.dynamic_peak_gib - dense.dynamic_peak_gib, + "overall_peak_bytes": frozen_lora.overall_peak_bytes - dense.overall_peak_bytes, + "overall_peak_gib": frozen_lora.overall_peak_gib - dense.overall_peak_gib, }, } @@ -317,11 +315,44 @@ def write_comparison(summaries: list[dict[str, Any]]) -> Path: return comparison_path +def write_theory_comparison(summaries: list[MemorySummary]) -> Path: + """Write a theory-vs-experiment comparison report for each variant.""" + possible_gap_sources = [ + "Theoretical gaps may come from incomplete theory, implementation details, or runtime behavior.", + "Allocator caching, autograd temporaries, and kernel workspaces can all move the measured result away from the simple tensor model.", + ] + keepalive_script = ROOT / "scripts" / "gpu_keepalive_loop.sh" + if keepalive_script.exists(): + possible_gap_sources.append( + "External GPU workload from scripts like scripts/gpu_keepalive_loop.sh can perturb allocator baselines if running concurrently." + ) + + comparisons = {} + for summary in summaries: + variant = LinearModelVariant(summary.name) + theory = build_theoretical_summary(EXPERIMENT_CONFIG, variant) + comparison = compare_theory_to_measurement( + theory, + summary, + tolerances=ComparisonTolerances(), + notes=( + "Keep the theory-vs-experiment gap explicit: disagreement is a signal to investigate, not something to smooth away.", + ), + possible_gap_sources=tuple(possible_gap_sources), + ) + comparisons[summary.name] = comparison.to_dict() + + COMPARISON_REPORT_PATH.write_text(json.dumps(comparisons, indent=2), encoding="utf-8") + return COMPARISON_REPORT_PATH + + def run_mosaic_analysis() -> None: """Analyze all known snapshots and emit a comparison summary.""" summaries = [analyze_snapshot(name) for name in SNAPSHOT_NAMES] comparison_path = write_comparison(summaries) + theory_path = write_theory_comparison(summaries) sys.stdout.write(f"comparison: {comparison_path}\n") + sys.stdout.write(f"theory_comparison: {theory_path}\n") def main() -> None: diff --git a/src/stellatscale/__init__.py b/src/stellatscale/__init__.py index 2810ff5..596ee3c 100644 --- a/src/stellatscale/__init__.py +++ b/src/stellatscale/__init__.py @@ -1,3 +1,29 @@ """stellatscale.""" -__all__: list[str] = [] +from stellatscale.memory_experiment import ( + ComparisonMetric, + ComparisonTolerances, + FrozenLoRALinear, + LinearModelVariant, + MemoryExperimentConfig, + MemorySummary, + TheoreticalMemorySummary, + TheoryExperimentComparison, + build_theoretical_summary, + bytes_to_gib, + compare_theory_to_measurement, +) + +__all__ = [ + "ComparisonMetric", + "ComparisonTolerances", + "FrozenLoRALinear", + "LinearModelVariant", + "MemoryExperimentConfig", + "MemorySummary", + "TheoreticalMemorySummary", + "TheoryExperimentComparison", + "build_theoretical_summary", + "bytes_to_gib", + "compare_theory_to_measurement", +] diff --git a/src/stellatscale/memory_experiment.py b/src/stellatscale/memory_experiment.py new file mode 100644 index 0000000..ee32915 --- /dev/null +++ b/src/stellatscale/memory_experiment.py @@ -0,0 +1,575 @@ +"""Reusable theory and comparison helpers for the dense-vs-LoRA memory experiment. + +The goal of this module is to keep theoretical accounting, measured summaries, +and the gap between the two as first-class data. Approximate agreement is +valuable, but disagreement is equally valuable because it can reveal problems in +theory, implementation, or the surrounding runtime environment. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from enum import StrEnum +from typing import TYPE_CHECKING, cast + +import torch +from torch import nn + +if TYPE_CHECKING: + from collections.abc import Mapping + from pathlib import Path + + +def bytes_to_gib(num_bytes: float) -> float: + """Convert bytes to gibibytes.""" + return num_bytes / 1024**3 + + +class LinearModelVariant(StrEnum): + """Supported linear-layer variants for the memory experiment.""" + + DENSE = "dense" + FROZEN_LORA = "frozen_lora" + + +@dataclass(frozen=True, slots=True) +class MemoryExperimentConfig: + """Configuration shared by theory and runtime measurement. + + Parameters + ---------- + batch_size: + Batch size used by the experiment. + in_features: + Input width of the linear layer. + out_features: + Output width of the linear layer. + lora_rank: + Rank of the LoRA adapters for the frozen-base variant. + steps: + Number of training steps to profile. + learning_rate: + Optimizer learning rate. + parameter_bytes: + Bytes per stored parameter element. + activation_bytes: + Bytes per activation element. + gradient_bytes: + Bytes per gradient element. + optimizer_state_bytes_per_trainable_element: + Total bytes used by optimizer state per trainable element. + include_input_in_static_baseline: + Whether to count the input tensor as part of the static baseline. + include_label_in_static_baseline: + Whether to count the label tensor as part of the static baseline. + """ + + batch_size: int + in_features: int + out_features: int + lora_rank: int + steps: int = 5 + learning_rate: float = 0.05 + parameter_bytes: int = 4 + activation_bytes: int = 4 + gradient_bytes: int = 4 + optimizer_state_bytes_per_trainable_element: int = 8 + include_input_in_static_baseline: bool = True + include_label_in_static_baseline: bool = True + + def __post_init__(self) -> None: + """Validate that the experiment dimensions are well-formed.""" + for field_name in ( + "batch_size", + "in_features", + "out_features", + "lora_rank", + "steps", + "parameter_bytes", + "activation_bytes", + "gradient_bytes", + "optimizer_state_bytes_per_trainable_element", + ): + value = getattr(self, field_name) + if value <= 0: + message = f"{field_name} must be strictly positive, got {value}" + raise ValueError(message) + + if self.learning_rate <= 0: + message = f"learning_rate must be strictly positive, got {self.learning_rate}" + raise ValueError(message) + + @property + def dense_parameter_count(self) -> int: + """Return the dense weight element count.""" + return self.in_features * self.out_features + + @property + def lora_trainable_parameter_count(self) -> int: + """Return the trainable LoRA element count.""" + return self.lora_rank * (self.in_features + self.out_features) + + @property + def input_bytes(self) -> int: + """Return the input tensor size in bytes.""" + return self.batch_size * self.in_features * self.activation_bytes + + @property + def label_bytes(self) -> int: + """Return the label tensor size in bytes.""" + return self.batch_size * self.out_features * self.activation_bytes + + +@dataclass(frozen=True, slots=True) +class TheoreticalMemorySummary: + """Theoretical memory accounting for one model variant.""" + + name: str + variant: LinearModelVariant + resident_parameter_bytes: int + trainable_parameter_bytes: int + gradient_bytes: int + optimizer_state_bytes: int + static_baseline_bytes: int + forward_dynamic_estimate_bytes: int + backward_dynamic_estimate_bytes: int + optimizer_dynamic_estimate_bytes: int + dynamic_peak_lower_bound_bytes: int + + def to_dict(self) -> dict[str, float | int | str]: + """Convert the theoretical summary to a JSON-serializable mapping.""" + return { + "name": self.name, + "variant": self.variant.value, + "resident_parameter_bytes": self.resident_parameter_bytes, + "resident_parameter_gib": bytes_to_gib(float(self.resident_parameter_bytes)), + "trainable_parameter_bytes": self.trainable_parameter_bytes, + "trainable_parameter_gib": bytes_to_gib(float(self.trainable_parameter_bytes)), + "gradient_bytes": self.gradient_bytes, + "gradient_gib": bytes_to_gib(float(self.gradient_bytes)), + "optimizer_state_bytes": self.optimizer_state_bytes, + "optimizer_state_gib": bytes_to_gib(float(self.optimizer_state_bytes)), + "static_baseline_bytes": self.static_baseline_bytes, + "static_baseline_gib": bytes_to_gib(float(self.static_baseline_bytes)), + "forward_dynamic_estimate_bytes": self.forward_dynamic_estimate_bytes, + "forward_dynamic_estimate_gib": bytes_to_gib( + float(self.forward_dynamic_estimate_bytes) + ), + "backward_dynamic_estimate_bytes": self.backward_dynamic_estimate_bytes, + "backward_dynamic_estimate_gib": bytes_to_gib( + float(self.backward_dynamic_estimate_bytes) + ), + "optimizer_dynamic_estimate_bytes": self.optimizer_dynamic_estimate_bytes, + "optimizer_dynamic_estimate_gib": bytes_to_gib( + float(self.optimizer_dynamic_estimate_bytes) + ), + "dynamic_peak_lower_bound_bytes": self.dynamic_peak_lower_bound_bytes, + "dynamic_peak_lower_bound_gib": bytes_to_gib( + float(self.dynamic_peak_lower_bound_bytes) + ), + } + + +class FrozenLoRALinear(nn.Module): + """Linear layer with a frozen base projection and trainable LoRA adapters.""" + + def __init__(self, base: nn.Linear, rank: int) -> None: + """Initialize the frozen base projection and low-rank adapters.""" + super().__init__() + self.base = base + self.base.requires_grad_(requires_grad=False) + self.lora_a = nn.Linear(base.in_features, rank, bias=False) + self.lora_b = nn.Linear(rank, base.out_features, bias=False) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """Apply the frozen base projection and LoRA update.""" + return self.base(inputs) + self.lora_b(self.lora_a(inputs)) + + +@dataclass(frozen=True, slots=True) +class AnnotationMetadata: + """Metadata attached to one measured annotation event.""" + + stage: str + name: str + device: int + time_us: int + + +@dataclass(frozen=True, slots=True) +class AnnotationMeasurement: + """Measured memory value for one annotation event.""" + + annotation: AnnotationMetadata + memory_bytes: float + memory_gib: float + + +@dataclass(frozen=True, slots=True) +class MemorySummary: + """Measured memory summary loaded from Mosaic output.""" + + name: str + dynamic_peak_bytes: float + dynamic_peak_gib: float + static_memory_bytes: float + static_memory_gib: float + overall_peak_bytes: float + overall_peak_gib: float + annotation_memory: dict[str, AnnotationMeasurement] + files: dict[str, str] = field(default_factory=dict) + + @classmethod + def from_mapping(cls, payload: Mapping[str, object]) -> MemorySummary: + """Build a typed memory summary from a JSON-like mapping.""" + annotation_payload = _require_mapping(payload.get("annotation_memory"), "annotation_memory") + files_payload = payload.get("files") + files: dict[str, str] = {} + if files_payload is not None: + files_mapping = _require_mapping(files_payload, "files") + files = { + key: _require_str(value, f"files[{key}]") for key, value in files_mapping.items() + } + + return cls( + name=_require_str(payload.get("name"), "name"), + dynamic_peak_bytes=_require_float( + payload.get("dynamic_peak_bytes"), "dynamic_peak_bytes" + ), + dynamic_peak_gib=_require_float(payload.get("dynamic_peak_gib"), "dynamic_peak_gib"), + static_memory_bytes=_require_float( + payload.get("static_memory_bytes"), "static_memory_bytes" + ), + static_memory_gib=_require_float(payload.get("static_memory_gib"), "static_memory_gib"), + overall_peak_bytes=_require_float( + payload.get("overall_peak_bytes"), "overall_peak_bytes" + ), + overall_peak_gib=_require_float(payload.get("overall_peak_gib"), "overall_peak_gib"), + annotation_memory={ + key: _parse_annotation_measurement(key, value) + for key, value in annotation_payload.items() + }, + files=files, + ) + + @classmethod + def from_json_path(cls, path: Path) -> MemorySummary: + """Load a measured memory summary from a JSON file.""" + payload = json.loads(path.read_text(encoding="utf-8")) + return cls.from_mapping(_require_mapping(payload, str(path))) + + def annotation_bytes(self, annotation_name: str, stage: str, occurrence: int = 0) -> float: + """Return the measured bytes for a given annotation event.""" + suffix = "" if occurrence == 0 else f"({occurrence})" + key = f"{annotation_name}_{stage}{suffix}" + measurement = self.annotation_memory.get(key) + if measurement is None: + message = f"Annotation {key!r} was not found in summary {self.name!r}" + raise KeyError(message) + return measurement.memory_bytes + + def to_dict(self) -> dict[str, object]: + """Convert the measured summary back to a JSON-serializable mapping.""" + return { + "name": self.name, + "dynamic_peak_bytes": self.dynamic_peak_bytes, + "dynamic_peak_gib": self.dynamic_peak_gib, + "static_memory_bytes": self.static_memory_bytes, + "static_memory_gib": self.static_memory_gib, + "overall_peak_bytes": self.overall_peak_bytes, + "overall_peak_gib": self.overall_peak_gib, + "annotation_memory": { + key: { + "annotation": { + "stage": value.annotation.stage, + "name": value.annotation.name, + "device": value.annotation.device, + "time_us": value.annotation.time_us, + }, + "memory_bytes": value.memory_bytes, + "memory_gib": value.memory_gib, + } + for key, value in self.annotation_memory.items() + }, + "files": dict(self.files), + } + + +@dataclass(frozen=True, slots=True) +class ComparisonTolerances: + """Relative tolerances used when comparing theory and measurement.""" + + static_baseline_relative: float = 0.05 + forward_dynamic_relative: float = 0.50 + backward_dynamic_relative: float = 0.50 + optimizer_dynamic_relative: float = 0.35 + + +@dataclass(frozen=True, slots=True) +class ComparisonMetric: + """One theory-vs-measurement metric with explicit gap information.""" + + metric_name: str + comparison_kind: str + predicted_bytes: float + measured_bytes: float + absolute_error_bytes: float + relative_error: float + tolerance: float | None + within_tolerance: bool + + def to_dict(self) -> dict[str, float | str | bool | None]: + """Convert the metric to a JSON-serializable mapping.""" + return { + "metric_name": self.metric_name, + "comparison_kind": self.comparison_kind, + "predicted_bytes": self.predicted_bytes, + "predicted_gib": bytes_to_gib(self.predicted_bytes), + "measured_bytes": self.measured_bytes, + "measured_gib": bytes_to_gib(self.measured_bytes), + "absolute_error_bytes": self.absolute_error_bytes, + "absolute_error_gib": bytes_to_gib(self.absolute_error_bytes), + "relative_error": self.relative_error, + "tolerance": self.tolerance, + "within_tolerance": self.within_tolerance, + } + + +@dataclass(frozen=True, slots=True) +class TheoryExperimentComparison: + """Comparison report preserving both agreement and disagreement.""" + + name: str + theory: TheoreticalMemorySummary + measured: MemorySummary + metrics: dict[str, ComparisonMetric] + notes: tuple[str, ...] = () + possible_gap_sources: tuple[str, ...] = () + + @property + def failing_metrics(self) -> tuple[str, ...]: + """Return the metric names that are outside tolerance.""" + return tuple(name for name, metric in self.metrics.items() if not metric.within_tolerance) + + def to_dict(self) -> dict[str, object]: + """Convert the comparison report to a JSON-serializable mapping.""" + return { + "name": self.name, + "theory": self.theory.to_dict(), + "measured": self.measured.to_dict(), + "metrics": {key: value.to_dict() for key, value in self.metrics.items()}, + "failing_metrics": list(self.failing_metrics), + "notes": list(self.notes), + "possible_gap_sources": list(self.possible_gap_sources), + } + + +def build_theoretical_summary( + config: MemoryExperimentConfig, variant: LinearModelVariant +) -> TheoreticalMemorySummary: + """Build the theoretical accounting for one model variant.""" + dense_elements = config.dense_parameter_count + dense_parameter_bytes = dense_elements * config.parameter_bytes + static_extras = 0 + if config.include_input_in_static_baseline: + static_extras += config.input_bytes + if config.include_label_in_static_baseline: + static_extras += config.label_bytes + + if variant is LinearModelVariant.DENSE: + trainable_elements = dense_elements + trainable_parameter_bytes = dense_parameter_bytes + resident_parameter_bytes = dense_parameter_bytes + forward_dynamic_estimate_bytes = ( + config.batch_size * (config.in_features + config.out_features) * config.activation_bytes + ) + name = LinearModelVariant.DENSE.value + else: + trainable_elements = config.lora_trainable_parameter_count + trainable_parameter_bytes = trainable_elements * config.parameter_bytes + resident_parameter_bytes = dense_parameter_bytes + trainable_parameter_bytes + forward_dynamic_estimate_bytes = ( + config.batch_size + * (config.in_features + config.out_features + config.lora_rank) + * config.activation_bytes + ) + name = LinearModelVariant.FROZEN_LORA.value + + gradient_bytes = trainable_elements * config.gradient_bytes + optimizer_state_bytes = trainable_elements * config.optimizer_state_bytes_per_trainable_element + static_baseline_bytes = resident_parameter_bytes + static_extras + backward_dynamic_estimate_bytes = gradient_bytes + forward_dynamic_estimate_bytes + optimizer_dynamic_estimate_bytes = optimizer_state_bytes + forward_dynamic_estimate_bytes + dynamic_peak_lower_bound_bytes = max( + forward_dynamic_estimate_bytes, + backward_dynamic_estimate_bytes, + optimizer_dynamic_estimate_bytes, + ) + + return TheoreticalMemorySummary( + name=name, + variant=variant, + resident_parameter_bytes=resident_parameter_bytes, + trainable_parameter_bytes=trainable_parameter_bytes, + gradient_bytes=gradient_bytes, + optimizer_state_bytes=optimizer_state_bytes, + static_baseline_bytes=static_baseline_bytes, + forward_dynamic_estimate_bytes=forward_dynamic_estimate_bytes, + backward_dynamic_estimate_bytes=backward_dynamic_estimate_bytes, + optimizer_dynamic_estimate_bytes=optimizer_dynamic_estimate_bytes, + dynamic_peak_lower_bound_bytes=dynamic_peak_lower_bound_bytes, + ) + + +def compare_theory_to_measurement( + theory: TheoreticalMemorySummary, + measured: MemorySummary, + tolerances: ComparisonTolerances | None = None, + notes: tuple[str, ...] = (), + possible_gap_sources: tuple[str, ...] = (), +) -> TheoryExperimentComparison: + """Compare theoretical accounting against one measured summary. + + The report intentionally preserves disagreement so it can be investigated + later rather than normalized away. + """ + actual_tolerances = tolerances or ComparisonTolerances() + metrics = { + "static_baseline": _approximate_metric( + metric_name="static_baseline", + predicted_bytes=float(theory.static_baseline_bytes), + measured_bytes=measured.static_memory_bytes, + tolerance=actual_tolerances.static_baseline_relative, + ), + "forward_end_dynamic": _approximate_metric( + metric_name="forward_end_dynamic", + predicted_bytes=float(theory.forward_dynamic_estimate_bytes), + measured_bytes=measured.annotation_bytes("## forward ##", "END"), + tolerance=actual_tolerances.forward_dynamic_relative, + ), + "backward_end_dynamic": _approximate_metric( + metric_name="backward_end_dynamic", + predicted_bytes=float(theory.backward_dynamic_estimate_bytes), + measured_bytes=measured.annotation_bytes("## backward ##", "END"), + tolerance=actual_tolerances.backward_dynamic_relative, + ), + "optimizer_end_dynamic": _approximate_metric( + metric_name="optimizer_end_dynamic", + predicted_bytes=float(theory.optimizer_dynamic_estimate_bytes), + measured_bytes=measured.annotation_bytes("## optimizer ##", "END"), + tolerance=actual_tolerances.optimizer_dynamic_relative, + ), + "dynamic_peak_lower_bound": _lower_bound_metric( + metric_name="dynamic_peak_lower_bound", + predicted_bytes=float(theory.dynamic_peak_lower_bound_bytes), + measured_bytes=measured.dynamic_peak_bytes, + ), + } + return TheoryExperimentComparison( + name=theory.name, + theory=theory, + measured=measured, + metrics=metrics, + notes=notes, + possible_gap_sources=possible_gap_sources, + ) + + +def _approximate_metric( + metric_name: str, predicted_bytes: float, measured_bytes: float, tolerance: float +) -> ComparisonMetric: + """Build an approximate-equality metric.""" + absolute_error_bytes = measured_bytes - predicted_bytes + relative_error = _relative_error(predicted_bytes, measured_bytes) + return ComparisonMetric( + metric_name=metric_name, + comparison_kind="approximate", + predicted_bytes=predicted_bytes, + measured_bytes=measured_bytes, + absolute_error_bytes=absolute_error_bytes, + relative_error=relative_error, + tolerance=tolerance, + within_tolerance=relative_error <= tolerance, + ) + + +def _lower_bound_metric( + metric_name: str, predicted_bytes: float, measured_bytes: float +) -> ComparisonMetric: + """Build a lower-bound metric where measured values are expected to be at least the bound.""" + absolute_error_bytes = measured_bytes - predicted_bytes + relative_error = _relative_error(predicted_bytes, measured_bytes) + return ComparisonMetric( + metric_name=metric_name, + comparison_kind="lower_bound", + predicted_bytes=predicted_bytes, + measured_bytes=measured_bytes, + absolute_error_bytes=absolute_error_bytes, + relative_error=relative_error, + tolerance=None, + within_tolerance=measured_bytes >= predicted_bytes, + ) + + +def _relative_error(predicted_bytes: float, measured_bytes: float) -> float: + """Return the relative error between predicted and measured bytes.""" + denominator = abs(predicted_bytes) + if denominator == 0: + return 0.0 if measured_bytes == 0 else float("inf") + return abs(measured_bytes - predicted_bytes) / denominator + + +def _parse_annotation_measurement(key: str, value: object) -> AnnotationMeasurement: + """Parse one annotation measurement mapping.""" + mapping = _require_mapping(value, key) + annotation_mapping = _require_mapping( + mapping.get("annotation"), f"annotation_memory[{key}].annotation" + ) + annotation = AnnotationMetadata( + stage=_require_str(annotation_mapping.get("stage"), f"annotation_memory[{key}].stage"), + name=_require_str(annotation_mapping.get("name"), f"annotation_memory[{key}].name"), + device=_require_int(annotation_mapping.get("device"), f"annotation_memory[{key}].device"), + time_us=_require_int( + annotation_mapping.get("time_us"), f"annotation_memory[{key}].time_us" + ), + ) + memory_bytes = _require_float( + mapping.get("memory_bytes"), f"annotation_memory[{key}].memory_bytes" + ) + memory_gib = _require_float(mapping.get("memory_gib"), f"annotation_memory[{key}].memory_gib") + return AnnotationMeasurement( + annotation=annotation, memory_bytes=memory_bytes, memory_gib=memory_gib + ) + + +def _require_mapping(value: object, field_name: str) -> Mapping[str, object]: + """Validate that a value is a mapping.""" + if not isinstance(value, dict): + message = f"{field_name} must be a mapping" + raise TypeError(message) + return cast("Mapping[str, object]", value) + + +def _require_str(value: object, field_name: str) -> str: + """Validate that a value is a string.""" + if not isinstance(value, str): + message = f"{field_name} must be a string" + raise TypeError(message) + return value + + +def _require_float(value: object, field_name: str) -> float: + """Validate that a value is numeric and convert it to float.""" + if not isinstance(value, int | float): + message = f"{field_name} must be numeric" + raise TypeError(message) + return float(value) + + +def _require_int(value: object, field_name: str) -> int: + """Validate that a value is an integer.""" + if not isinstance(value, int): + message = f"{field_name} must be an integer" + raise TypeError(message) + return value diff --git a/tests/test_memory_experiment.py b/tests/test_memory_experiment.py new file mode 100644 index 0000000..a6b094d --- /dev/null +++ b/tests/test_memory_experiment.py @@ -0,0 +1,190 @@ +"""Tests for reusable memory experiment helpers.""" + +from __future__ import annotations + +import torch +from torch import nn + +from stellatscale.memory_experiment import ( + ComparisonTolerances, + FrozenLoRALinear, + LinearModelVariant, + MemoryExperimentConfig, + MemorySummary, + build_theoretical_summary, + compare_theory_to_measurement, +) + + +def test_dense_theoretical_accounting_matches_exact_counts() -> None: + """Dense accounting should match the closed-form formulas exactly.""" + config = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16 + ) + + summary = build_theoretical_summary(config, LinearModelVariant.DENSE) + + assert summary.resident_parameter_bytes == 4096 * 4096 * 4 + assert summary.trainable_parameter_bytes == 4096 * 4096 * 4 + assert summary.gradient_bytes == 4096 * 4096 * 4 + assert summary.optimizer_state_bytes == 4096 * 4096 * 8 + assert summary.static_baseline_bytes == (4096 * 4096 * 4) + (16 * 4096 * 4) + (16 * 4096 * 4) + + +def test_frozen_lora_theoretical_accounting_matches_exact_counts() -> None: + """Frozen-LoRA accounting should only charge gradients and optimizer state to adapters.""" + config = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16 + ) + + summary = build_theoretical_summary(config, LinearModelVariant.FROZEN_LORA) + + adapter_bytes = 16 * (4096 + 4096) * 4 + assert summary.resident_parameter_bytes == (4096 * 4096 * 4) + adapter_bytes + assert summary.trainable_parameter_bytes == adapter_bytes + assert summary.gradient_bytes == adapter_bytes + assert summary.optimizer_state_bytes == 16 * (4096 + 4096) * 8 + + +def test_frozen_lora_linear_keeps_base_frozen_and_optimizer_state_scoped() -> None: + """The frozen base weight should not receive gradients or optimizer state.""" + model = FrozenLoRALinear(nn.Linear(8, 8, bias=False), rank=2) + inputs = torch.randn(4, 8) + labels = torch.randn(4, 8) + adam = torch.optim.AdamW( + [parameter for parameter in model.parameters() if parameter.requires_grad], lr=0.1 + ) + + prediction = model(inputs) + loss = nn.functional.mse_loss(prediction, labels) + loss.backward() + adam.step() + + assert model.base.weight.grad is None + assert model.base.weight not in adam.state + assert model.lora_a.weight in adam.state + assert model.lora_b.weight in adam.state + + +def test_comparison_report_preserves_theoretical_gap_information() -> None: + """Comparison reports should preserve large gaps instead of normalizing them away.""" + config = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16 + ) + theory = build_theoretical_summary(config, LinearModelVariant.FROZEN_LORA) + measured = MemorySummary.from_mapping( + { + "name": "frozen_lora", + "dynamic_peak_bytes": 2359296.0, + "dynamic_peak_gib": 0.002197265625, + "static_memory_bytes": 85196800.0, + "static_memory_gib": 0.079345703125, + "overall_peak_bytes": 87556096.0, + "overall_peak_gib": 0.08154296875, + "annotation_memory": { + "## forward ##_END": { + "annotation": { + "stage": "END", + "name": "## forward ##", + "device": 0, + "time_us": 1, + }, + "memory_bytes": 263168.0, + "memory_gib": 0.00024509429931640625, + }, + "## backward ##_END": { + "annotation": { + "stage": "END", + "name": "## backward ##", + "device": 0, + "time_us": 2, + }, + "memory_bytes": 786432.0, + "memory_gib": 0.000732421875, + }, + "## optimizer ##_END": { + "annotation": { + "stage": "END", + "name": "## optimizer ##", + "device": 0, + "time_us": 3, + }, + "memory_bytes": 1310720.0, + "memory_gib": 0.001220703125, + }, + }, + "files": {}, + } + ) + + report = compare_theory_to_measurement( + theory, + measured, + tolerances=ComparisonTolerances(static_baseline_relative=0.10), + notes=("Preserve the error; it may reflect theory, implementation, or runtime behavior.",), + possible_gap_sources=( + "External GPU workload from scripts like scripts/gpu_keepalive_loop.sh can perturb allocator baselines if running concurrently.", + ), + ) + + assert "static_baseline" in report.failing_metrics + assert report.metrics["dynamic_peak_lower_bound"].within_tolerance + assert "gpu_keepalive_loop.sh" in report.possible_gap_sources[0] + + +def test_dense_measurement_agreement_is_reasonable_for_major_metrics() -> None: + """The dense measured baseline should be in rough agreement for the major metrics.""" + config = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16 + ) + theory = build_theoretical_summary(config, LinearModelVariant.DENSE) + measured = MemorySummary.from_mapping( + { + "name": "dense", + "dynamic_peak_bytes": 285736960.0, + "dynamic_peak_gib": 0.26611328125, + "static_memory_bytes": 67633152.0, + "static_memory_gib": 0.06298828125, + "overall_peak_bytes": 353370112.0, + "overall_peak_gib": 0.3291015625, + "annotation_memory": { + "## forward ##_END": { + "annotation": { + "stage": "END", + "name": "## forward ##", + "device": 0, + "time_us": 1, + }, + "memory_bytes": 8781824.0, + "memory_gib": 0.0081787109375, + }, + "## backward ##_END": { + "annotation": { + "stage": "END", + "name": "## backward ##", + "device": 0, + "time_us": 2, + }, + "memory_bytes": 84410368.0, + "memory_gib": 0.07861328125, + }, + "## optimizer ##_END": { + "annotation": { + "stage": "END", + "name": "## optimizer ##", + "device": 0, + "time_us": 3, + }, + "memory_bytes": 151519232.0, + "memory_gib": 0.14111328125, + }, + }, + "files": {}, + } + ) + + report = compare_theory_to_measurement(theory, measured) + + assert report.metrics["static_baseline"].within_tolerance + assert report.metrics["optimizer_end_dynamic"].within_tolerance + assert report.metrics["dynamic_peak_lower_bound"].within_tolerance From 41dbe2ec4cfb5ff16fb359597596d53846b7c6b0 Mon Sep 17 00:00:00 2001 From: gklajer Date: Thu, 26 Mar 2026 09:01:11 +0000 Subject: [PATCH 3/4] refactor(profilling): reorganize output directory structure --- scripts/lora_memory_analysis.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/lora_memory_analysis.py b/scripts/lora_memory_analysis.py index e4a5e9e..ce8a78a 100644 --- a/scripts/lora_memory_analysis.py +++ b/scripts/lora_memory_analysis.py @@ -50,11 +50,13 @@ class MemoryAbstractProtocol(Protocol): ROOT = Path(__file__).resolve().parents[1] -SNAPSHOT_DIR = ROOT / "deliverables" / "single_layer_lora_outputs" -OUTPUT_DIR = SNAPSHOT_DIR / "mosaic" +RESULTS_DIR = ROOT / "results" / "memory" / "single_layer_lora" +SNAPSHOT_DIR = RESULTS_DIR / "snapshots" +TRACE_DIR = RESULTS_DIR / "traces" +OUTPUT_DIR = RESULTS_DIR / "mosaic" ANNOTATIONS = ("## forward ##", "## backward ##", "## optimizer ##") SNAPSHOT_NAMES = ("dense", "frozen_lora") -COMPARISON_REPORT_PATH = OUTPUT_DIR / "theory_comparison.json" +COMPARISON_REPORT_PATH = RESULTS_DIR / "theory_comparison.json" EXPERIMENT_CONFIG = MemoryExperimentConfig( batch_size=16, in_features=4096, out_features=4096, lora_rank=16, steps=5, learning_rate=0.05 @@ -132,8 +134,10 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch lr=EXPERIMENT_CONFIG.learning_rate, ) + SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" - trace_dir = SNAPSHOT_DIR / f"{name}_traces" + trace_dir = TRACE_DIR / name profile_schedule = schedule(wait=0, warmup=0, active=EXPERIMENT_CONFIG.steps, repeat=1) with ( @@ -310,7 +314,8 @@ def write_comparison(summaries: list[MemorySummary]) -> Path: }, } - comparison_path = OUTPUT_DIR / "comparison.json" + RESULTS_DIR.mkdir(parents=True, exist_ok=True) + comparison_path = RESULTS_DIR / "comparison.json" comparison_path.write_text(json.dumps(comparison, indent=2), encoding="utf-8") return comparison_path @@ -321,11 +326,6 @@ def write_theory_comparison(summaries: list[MemorySummary]) -> Path: "Theoretical gaps may come from incomplete theory, implementation details, or runtime behavior.", "Allocator caching, autograd temporaries, and kernel workspaces can all move the measured result away from the simple tensor model.", ] - keepalive_script = ROOT / "scripts" / "gpu_keepalive_loop.sh" - if keepalive_script.exists(): - possible_gap_sources.append( - "External GPU workload from scripts like scripts/gpu_keepalive_loop.sh can perturb allocator baselines if running concurrently." - ) comparisons = {} for summary in summaries: @@ -342,6 +342,7 @@ def write_theory_comparison(summaries: list[MemorySummary]) -> Path: ) comparisons[summary.name] = comparison.to_dict() + RESULTS_DIR.mkdir(parents=True, exist_ok=True) COMPARISON_REPORT_PATH.write_text(json.dumps(comparisons, indent=2), encoding="utf-8") return COMPARISON_REPORT_PATH @@ -359,6 +360,7 @@ def main() -> None: """Run profiling and, when available, Mosaic post-processing.""" args = parse_args() + RESULTS_DIR.mkdir(parents=True, exist_ok=True) SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) if args.mosaic_only: From 97463f138beb4812a6dae4faf4faaa5e54b3d61f Mon Sep 17 00:00:00 2001 From: gklajer Date: Fri, 27 Mar 2026 00:45:43 +0000 Subject: [PATCH 4/4] feat(profiling): add Mosaic memory analysis and report figure --- deliverables/report.tex | 33 +- scripts/lora_memory_analysis.py | 562 +++++++++++++++++++++-- scripts/plot_memory_mosaic_widget.py | 630 ++++++++++++++++++++++++++ src/stellatscale/__init__.py | 16 +- src/stellatscale/memory_experiment.py | 422 ++++++++++++++--- src/stellatscale/models.py | 62 +++ tests/test_memory_experiment.py | 228 ++++++++-- 7 files changed, 1799 insertions(+), 154 deletions(-) create mode 100644 scripts/plot_memory_mosaic_widget.py diff --git a/deliverables/report.tex b/deliverables/report.tex index f10dd57..4963cc6 100644 --- a/deliverables/report.tex +++ b/deliverables/report.tex @@ -42,7 +42,7 @@ \subsection{Direct Comparison} \begin{figure}[h] \centering - \includegraphics[width=0.5\textwidth]{figures/direct.png} + \includegraphics[draft,width=0.5\textwidth]{figures/direct.png} \caption{Direct comparison: mean fitting loss with min--mean shaded region (1000 trials per rank).} \label{fig:direct} \end{figure} @@ -53,7 +53,7 @@ \subsection{Baseline Comparison} \begin{figure}[h] \centering - \includegraphics[width=0.5\textwidth]{figures/baseline.png} + \includegraphics[draft,width=0.5\textwidth]{figures/baseline.png} \caption{Baseline comparison: both methods approximate a full Transformer. Shaded region shows min--mean spread.} \label{fig:baseline} \end{figure} @@ -64,7 +64,7 @@ \subsection{Ablation: Stiefel vs.\ Euclidean Geometry} \begin{figure}[h] \centering - \includegraphics[width=0.5\textwidth]{figures/ablation.png} + \includegraphics[draft,width=0.5\textwidth]{figures/ablation.png} \caption{Ablation: Euclidean 3-factor vs.\ StelLA (same $USV^\top$, different optimizer).} \label{fig:ablation} \end{figure} @@ -75,7 +75,7 @@ \subsection{Computational Efficiency} \begin{figure}[h] \centering - \includegraphics[width=0.7\textwidth]{figures/benchmark_gpu_memory_phases.png} + \includegraphics[draft,width=0.7\textwidth]{figures/benchmark_gpu_memory_phases.png} \caption{GPU memory usage per training phase (averaged over steps) for a full Transformer, LoRA, and StelLA at ranks $r \in \{8, 32, 64, 128\}$.} \label{fig:mem_phases} \end{figure} @@ -84,13 +84,13 @@ \subsection{Computational Efficiency} \centering \begin{minipage}{0.48\textwidth} \centering - \includegraphics[width=\textwidth]{figures/benchmark_peak_memory.png} + \includegraphics[draft,width=\textwidth]{figures/benchmark_peak_memory.png} \caption{Peak GPU memory (MB).} \label{fig:peak_mem} \end{minipage}\hfill \begin{minipage}{0.48\textwidth} \centering - \includegraphics[width=\textwidth]{figures/benchmark_throughput.png} + \includegraphics[draft,width=\textwidth]{figures/benchmark_throughput.png} \caption{Training throughput (samples/s).} \label{fig:throughput} \end{minipage} @@ -104,13 +104,32 @@ \subsection{Computational Efficiency} \begin{figure}[h] \centering - \includegraphics[width=0.7\textwidth]{figures/benchmark_time_breakdown.png} + \includegraphics[draft,width=0.7\textwidth]{figures/benchmark_time_breakdown.png} \caption{Per-step time breakdown (forward / backward / optimizer) in milliseconds.} \label{fig:time_breakdown} \end{figure} \paragraph{Time breakdown.} Figure~\ref{fig:time_breakdown} decomposes the per-step wall time into forward, backward, and optimizer phases. Forward and backward times are nearly identical between LoRA and StelLA at each rank, confirming that the Stiefel constraint does not alter the computational graph of these phases. The overhead is entirely concentrated in the optimizer step: at $r{=}128$, StelLA's optimizer phase takes ${\approx}140$\,ms vs.\ ${\approx}10$\,ms for LoRA, accounting for the throughput gap observed above. This is expected, as the polar retraction requires an SVD of an $r \times r$ matrix at each adapted layer per step. At low ranks this cost is negligible, but it scales cubically with $r$, making it the dominant bottleneck at $r{=}128$. This throughput penalty is the main practical cost of the Stiefel constraint and should be weighed against the expressivity and memory gains reported above. +\subsection{Single-Layer Memory Accounting} + +\begin{figure}[h] + \centering + \includegraphics[width=0.95\textwidth]{figures/single_layer_memory_mosaic_widget_din_4096_dout_4096_r_128_b_128.png} + \caption{Single-layer memory comparison for a $4096 \times 4096$ linear layer and rank-$128$ adapters at batch size $128$. The top row compares the measured Mosaic peak breakdown against the analytical tensor-accounting estimate for the dense baseline, LoRA, and StelLA. The bottom row shows the measured Mosaic categories over one profiled training step, with the peak marked by a dashed vertical line.} + \label{fig:single_layer_memory} +\end{figure} + +Figure~\ref{fig:single_layer_memory} provides a more controlled view of the memory story behind the end-to-end benchmark. We isolate a single trainable linear layer and compare three variants: full dense training, LoRA, and StelLA. The analytical bars are obtained from explicit tensor accounting: static memory is the resident parameter storage, activation memory is estimated from the forward intermediates, backward memory is approximated by gradient-bearing tensors, and optimizer memory is the Adam state required by trainable parameters. For the dense baseline this yields a total estimate of $264.0$\,MiB, while LoRA and StelLA are both around $88$\,MiB because only the low-rank adapter parameters are trainable. + +Analytically, the dense baseline pays full parameter, gradient, and Adam-state cost, +$$ +m_{\mathrm{dense}} \sim 4d_{\mathrm{out}}d_{\mathrm{in}} + 4d_{\mathrm{out}}d_{\mathrm{in}} + 8d_{\mathrm{out}}d_{\mathrm{in}}, +$$ +while LoRA reduces the trainable part to $r(d_{\mathrm{in}} + d_{\mathrm{out}})$ and StelLA to $r(d_{\mathrm{in}} + d_{\mathrm{out}}) + r^2$, with activation terms of order $4b(d_{\mathrm{in}} + r + d_{\mathrm{out}})$. This gives $264.0$\,MiB for dense training and about $88$\,MiB for both LoRA and StelLA. + +The measured bars are taken from Mosaic after 5 warmup and 5 active steps. LoRA reaches $106.2$\,MiB, while StelLA rises to $124.8$\,MiB because its optimizer introduces extra transient workspace. The analytical view is therefore a compact tensor-accounting model, while Mosaic captures the true runtime peak, including temporary buffers and \texttt{Unknown} allocator activity. + \section{Limitations and Discussion} Several caveats apply to our findings. Our models are single-layer Transformers with $d{=}8$, far from the multi-layer, high-dimensional architectures (LLaMA-7B, ViT-Large) on which StelLA reports its strongest gains; the observed trends may not transfer directly to larger scales. The expressivity framework measures functional capacity rather than downstream task accuracy, and uses only 5 optimization steps per trial---too few for the geometric constraint to fully manifest its advantage, as evidenced by the narrow ablation gap. Finally, LoRA's advantage at $r{=}1$ in the baseline setting suggests the Stiefel constraint may be overly restrictive when the rank budget is very small. diff --git a/scripts/lora_memory_analysis.py b/scripts/lora_memory_analysis.py index ce8a78a..7c0ae6e 100644 --- a/scripts/lora_memory_analysis.py +++ b/scripts/lora_memory_analysis.py @@ -1,15 +1,18 @@ -"""Profile a dense-vs-LoRA linear layer and generate optional Mosaic reports.""" +"""Profile linear, linear-LoRA, and linear-StelLA variants and generate optional Mosaic reports.""" from __future__ import annotations import argparse +import gc import importlib import json +import re import sys +import warnings from contextlib import contextmanager, redirect_stdout from io import StringIO from pathlib import Path -from typing import TYPE_CHECKING, Any, Protocol +from typing import TYPE_CHECKING, Any, Protocol, cast import torch from torch import nn @@ -22,8 +25,8 @@ ) from stellatscale.memory_experiment import ( + ComparisonNarrative, ComparisonTolerances, - FrozenLoRALinear, LinearModelVariant, MemoryExperimentConfig, MemorySummary, @@ -31,6 +34,7 @@ bytes_to_gib, compare_theory_to_measurement, ) +from stellatscale.models import LoRALinear, StelLAAdamW, StelLALinear if TYPE_CHECKING: from collections.abc import Callable, Iterator @@ -49,25 +53,201 @@ class MemoryAbstractProtocol(Protocol): memory_snapshot: MemorySnapshotProtocol +def current_allocator_state() -> dict[str, float] | None: + """Return the current CUDA allocator state, or None on non-CUDA runs.""" + if DEVICE.type != "cuda": + return None + + allocated_bytes = float(torch.cuda.memory_allocated()) + reserved_bytes = float(torch.cuda.memory_reserved()) + reserved_cached_bytes = max(0.0, reserved_bytes - allocated_bytes) + return { + "allocated_bytes": allocated_bytes, + "allocated_gib": bytes_to_gib(allocated_bytes), + "reserved_bytes": reserved_bytes, + "reserved_gib": bytes_to_gib(reserved_bytes), + "reserved_cached_bytes": reserved_cached_bytes, + "reserved_cached_gib": bytes_to_gib(reserved_cached_bytes), + } + + +def current_live_tensor_accounting( + model: nn.Module, optimizer: torch.optim.Optimizer, inputs: torch.Tensor, labels: torch.Tensor +) -> dict[str, Any] | None: + """Return a categorized snapshot of live CUDA tensor storage.""" + if DEVICE.type != "cuda": + return None + + categories = _empty_live_tensor_categories() + top_other_tensors: list[dict[str, Any]] = [] + known_ptrs = _known_tensor_pointers(model, optimizer, inputs, labels) + + for data_ptr, tensor in _collect_live_cuda_tensors().items(): + storage_bytes = float(tensor.untyped_storage().nbytes()) + category = _live_tensor_category(data_ptr, known_ptrs) + categories[category] += storage_bytes + if category == "other": + top_other_tensors.append(_describe_live_tensor(tensor, storage_bytes)) + + top_other_tensors.sort(key=lambda item: float(item["bytes"]), reverse=True) + total_live_tensor_bytes = float(sum(categories.values())) + return { + "total_live_tensor_bytes": total_live_tensor_bytes, + "total_live_tensor_gib": bytes_to_gib(total_live_tensor_bytes), + "categories": { + key: {"bytes": value, "gib": bytes_to_gib(value)} for key, value in categories.items() + }, + "top_other_tensors": top_other_tensors[:8], + } + + +def _empty_live_tensor_categories() -> dict[str, float]: + """Return zero-initialized live tensor categories.""" + return { + "parameters": 0.0, + "gradients": 0.0, + "optimizer_state": 0.0, + "inputs": 0.0, + "labels": 0.0, + "other": 0.0, + } + + +def _known_tensor_pointers( + model: nn.Module, optimizer: torch.optim.Optimizer, inputs: torch.Tensor, labels: torch.Tensor +) -> dict[str, set[int] | int]: + """Collect data pointers for tensors with known semantic roles.""" + return { + "parameters": { + parameter.untyped_storage().data_ptr() + for parameter in model.parameters() + if parameter.is_cuda + }, + "gradients": { + parameter.grad.untyped_storage().data_ptr() + for parameter in model.parameters() + if parameter.grad is not None and parameter.grad.is_cuda + }, + "optimizer_state": { + value.untyped_storage().data_ptr() + for state in optimizer.state.values() + for value in state.values() + if isinstance(value, torch.Tensor) and value.is_cuda + }, + "inputs": inputs.untyped_storage().data_ptr(), + "labels": labels.untyped_storage().data_ptr(), + } + + +def _collect_live_cuda_tensors() -> dict[int, torch.Tensor]: + """Collect unique live CUDA tensors indexed by storage pointer.""" + seen_tensors: dict[int, torch.Tensor] = {} + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + for obj in gc.get_objects(): + try: + if isinstance(obj, torch.Tensor) and obj.is_cuda: + data_ptr = obj.untyped_storage().data_ptr() + if data_ptr not in seen_tensors: + seen_tensors[data_ptr] = obj + except (AttributeError, ReferenceError, RuntimeError): + continue + return seen_tensors + + +def _live_tensor_category(data_ptr: int, known_ptrs: dict[str, set[int] | int]) -> str: + """Map a tensor storage pointer to its accounting category.""" + parameter_ptrs = cast("set[int]", known_ptrs["parameters"]) + gradient_ptrs = cast("set[int]", known_ptrs["gradients"]) + optimizer_state_ptrs = cast("set[int]", known_ptrs["optimizer_state"]) + input_ptr = cast("int", known_ptrs["inputs"]) + label_ptr = cast("int", known_ptrs["labels"]) + + if data_ptr in parameter_ptrs: + return "parameters" + if data_ptr in gradient_ptrs: + return "gradients" + if data_ptr in optimizer_state_ptrs: + return "optimizer_state" + if data_ptr == input_ptr: + return "inputs" + if data_ptr == label_ptr: + return "labels" + return "other" + + +def _describe_live_tensor(tensor: torch.Tensor, storage_bytes: float) -> dict[str, Any]: + """Build a compact description of one unmatched live CUDA tensor.""" + return { + "bytes": storage_bytes, + "gib": bytes_to_gib(storage_bytes), + "shape": list(tensor.shape), + "dtype": str(tensor.dtype), + "requires_grad": bool(tensor.requires_grad), + } + + ROOT = Path(__file__).resolve().parents[1] -RESULTS_DIR = ROOT / "results" / "memory" / "single_layer_lora" +RESULTS_ROOT_DIR = ROOT / "results" / "memory" / "single_layer_lora" +RESULTS_DIR = RESULTS_ROOT_DIR SNAPSHOT_DIR = RESULTS_DIR / "snapshots" TRACE_DIR = RESULTS_DIR / "traces" OUTPUT_DIR = RESULTS_DIR / "mosaic" ANNOTATIONS = ("## forward ##", "## backward ##", "## optimizer ##") -SNAPSHOT_NAMES = ("dense", "frozen_lora") +SNAPSHOT_NAMES = ("linear", "linear_lora", "linear_stella") COMPARISON_REPORT_PATH = RESULTS_DIR / "theory_comparison.json" -EXPERIMENT_CONFIG = MemoryExperimentConfig( - batch_size=16, in_features=4096, out_features=4096, lora_rank=16, steps=5, learning_rate=0.05 +DEFAULT_EXPERIMENT_CONFIG = MemoryExperimentConfig( + batch_size=16, + in_features=4096, + out_features=4096, + lora_rank=16, + warmup_steps=5, + steps=5, + learning_rate=0.05, ) +EXPERIMENT_CONFIG = DEFAULT_EXPERIMENT_CONFIG DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +def build_results_dir(config: MemoryExperimentConfig, output_tag: str | None) -> Path: + """Return the results directory for one experiment configuration.""" + if output_tag is None and config == DEFAULT_EXPERIMENT_CONFIG: + return RESULTS_ROOT_DIR + + tag = output_tag or ( + f"din_{config.in_features}_dout_{config.out_features}" + f"_r_{config.lora_rank}_b_{config.batch_size}" + ) + return RESULTS_ROOT_DIR / "runs" / tag + + +def configure_runtime_paths(results_dir: Path) -> None: + """Update the module-level paths used by the profiling workflow.""" + global RESULTS_DIR, SNAPSHOT_DIR, TRACE_DIR, OUTPUT_DIR, COMPARISON_REPORT_PATH # noqa: PLW0603 + RESULTS_DIR = results_dir + SNAPSHOT_DIR = RESULTS_DIR / "snapshots" + TRACE_DIR = RESULTS_DIR / "traces" + OUTPUT_DIR = RESULTS_DIR / "mosaic" + COMPARISON_REPORT_PATH = RESULTS_DIR / "theory_comparison.json" + + def parse_args() -> argparse.Namespace: """Parse command-line arguments for profiling and post-processing.""" parser = argparse.ArgumentParser() + parser.add_argument("--batch-size", type=int, default=EXPERIMENT_CONFIG.batch_size) + parser.add_argument("--in-features", type=int, default=EXPERIMENT_CONFIG.in_features) + parser.add_argument("--out-features", type=int, default=EXPERIMENT_CONFIG.out_features) + parser.add_argument("--lora-rank", type=int, default=EXPERIMENT_CONFIG.lora_rank) + parser.add_argument("--warmup-steps", type=int, default=EXPERIMENT_CONFIG.warmup_steps) + parser.add_argument("--steps", type=int, default=EXPERIMENT_CONFIG.steps) + parser.add_argument("--learning-rate", type=float, default=EXPERIMENT_CONFIG.learning_rate) + parser.add_argument( + "--output-tag", + default=None, + help="Optional output subdirectory name under results/memory/single_layer_lora/runs/.", + ) parser.add_argument( "--mosaic-only", action="store_true", @@ -76,6 +256,19 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() +def build_experiment_config(args: argparse.Namespace) -> MemoryExperimentConfig: + """Build the experiment configuration from parsed CLI arguments.""" + return MemoryExperimentConfig( + batch_size=args.batch_size, + in_features=args.in_features, + out_features=args.out_features, + lora_rank=args.lora_rank, + warmup_steps=args.warmup_steps, + steps=args.steps, + learning_rate=args.learning_rate, + ) + + def build_inputs() -> tuple[torch.Tensor, torch.Tensor]: """Create deterministic inputs and labels for the profiling run.""" generator = torch.Generator(device="cpu").manual_seed(7) @@ -89,20 +282,46 @@ def build_inputs() -> tuple[torch.Tensor, torch.Tensor]: def make_dense_model() -> nn.Module: - """Construct the dense linear baseline model.""" + """Construct the plain linear baseline model.""" torch.manual_seed(7) return nn.Linear(EXPERIMENT_CONFIG.in_features, EXPERIMENT_CONFIG.out_features, bias=False) def make_lora_model() -> nn.Module: - """Construct the frozen-base LoRA variant used for comparison.""" + """Construct the linear-LoRA variant used for comparison.""" torch.manual_seed(7) - return FrozenLoRALinear( + return LoRALinear( nn.Linear(EXPERIMENT_CONFIG.in_features, EXPERIMENT_CONFIG.out_features, bias=False), rank=EXPERIMENT_CONFIG.lora_rank, ) +def make_stella_model() -> nn.Module: + """Construct the linear-StelLA variant used for comparison.""" + torch.manual_seed(7) + return StelLALinear( + nn.Linear(EXPERIMENT_CONFIG.in_features, EXPERIMENT_CONFIG.out_features, bias=False), + rank=EXPERIMENT_CONFIG.lora_rank, + ) + + +def make_optimizer(name: str, model: nn.Module) -> torch.optim.Optimizer: + """Construct the optimizer used for one profiled model variant.""" + if name != LinearModelVariant.LINEAR_STELLA.value: + return torch.optim.AdamW( + [parameter for parameter in model.parameters() if parameter.requires_grad], + lr=EXPERIMENT_CONFIG.learning_rate, + ) + + if not isinstance(model, StelLALinear): + message = f"Expected StelLALinear for {name}, got {type(model)!r}" + raise TypeError(message) + return StelLAAdamW( + (parameter for parameter in model.parameters() if parameter.requires_grad), + lr=EXPERIMENT_CONFIG.learning_rate, + ) + + def profiler_activities() -> list[ProfilerActivity]: """Return the profiler activity set supported by the current device.""" if DEVICE.type == "cuda": @@ -127,19 +346,28 @@ def capture_snapshot(snapshot_path: Path) -> Iterator[None]: def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch.Tensor) -> None: """Run the profiler and snapshot capture for one model variant.""" + if DEVICE.type == "cuda": + gc.collect() + torch.cuda.synchronize() + torch.cuda.empty_cache() + torch.cuda.reset_peak_memory_stats() + model = model.to(DEVICE) loss_fn = nn.MSELoss() - optimizer = torch.optim.AdamW( - [parameter for parameter in model.parameters() if parameter.requires_grad], - lr=EXPERIMENT_CONFIG.learning_rate, - ) + optimizer = make_optimizer(name, model) SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) TRACE_DIR.mkdir(parents=True, exist_ok=True) snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" trace_dir = TRACE_DIR / name - profile_schedule = schedule(wait=0, warmup=0, active=EXPERIMENT_CONFIG.steps, repeat=1) + runtime_state_path = RESULTS_DIR / f"{name}_allocator_state.json" + live_tensor_path = RESULTS_DIR / f"{name}_live_tensors.json" + profile_schedule = schedule( + wait=0, warmup=EXPERIMENT_CONFIG.warmup_steps, active=EXPERIMENT_CONFIG.steps, repeat=1 + ) + steady_state_allocator = None + steady_state_live_tensors = None with ( capture_snapshot(snapshot_path), profile( @@ -151,7 +379,14 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch on_trace_ready=tensorboard_trace_handler(str(trace_dir)), ) as profiler, ): - for _ in range(EXPERIMENT_CONFIG.steps): + for step_index in range(EXPERIMENT_CONFIG.total_profile_steps): + if DEVICE.type == "cuda" and step_index == EXPERIMENT_CONFIG.warmup_steps: + torch.cuda.synchronize() + steady_state_allocator = current_allocator_state() + steady_state_live_tensors = current_live_tensor_accounting( + model, optimizer, inputs, labels + ) + with record_function("## forward ##"): pred = model(inputs) @@ -164,9 +399,22 @@ def run_profile(name: str, model: nn.Module, inputs: torch.Tensor, labels: torch profiler.step() + if steady_state_allocator is not None: + runtime_state_path.write_text( + json.dumps(steady_state_allocator, indent=2), encoding="utf-8" + ) + if steady_state_live_tensors is not None: + live_tensor_path.write_text( + json.dumps(steady_state_live_tensors, indent=2), encoding="utf-8" + ) + sys.stdout.write(f"{name}\n") sys.stdout.write(f" snapshot: {snapshot_path}\n") sys.stdout.write(f" trace_dir: {trace_dir}\n") + if steady_state_allocator is not None: + sys.stdout.write(f" allocator_state: {runtime_state_path}\n") + if steady_state_live_tensors is not None: + sys.stdout.write(f" live_tensors: {live_tensor_path}\n") def load_mosaic() -> tuple[Any, Any, Any, Any]: @@ -215,6 +463,72 @@ def build_peak_summary(name: str, memory_abstract: MemoryAbstractProtocol) -> di } +def _parse_mosaic_memory_size(memory_size: str) -> float: + """Convert a Mosaic-formatted memory size string into bytes.""" + units = { + "B": 1.0, + "KB": 1024.0, + "MB": 1024.0**2, + "GB": 1024.0**3, + "KIB": 1024.0, + "MIB": 1024.0**2, + "GIB": 1024.0**3, + } + match = re.fullmatch(r"([0-9]+(?:\.[0-9]+)?)([A-Za-z]+)", memory_size.strip()) + if match is None: + message = f"Unsupported Mosaic memory size: {memory_size!r}" + raise ValueError(message) + value = float(match.group(1)) + unit = match.group(2).upper() + multiplier = units.get(unit) + if multiplier is None: + message = f"Unsupported Mosaic memory unit: {unit!r}" + raise ValueError(message) + return value * multiplier + + +def _parse_mosaic_category_profile(output: str) -> tuple[float | None, dict[str, float]]: + """Parse total allocated and category bytes from Mosaic's category-profile output.""" + total_allocated_bytes = None + category_bytes: dict[str, float] = {} + total_match = re.search(r"Total Allocated:\s+([0-9.]+[A-Za-z]+)", output) + if total_match is not None: + total_allocated_bytes = _parse_mosaic_memory_size(total_match.group(1)) + + for match in re.finditer(r"AllocationType\.([A-Z_]+):\s+([0-9.]+[A-Za-z]+)", output): + category_bytes[match.group(1)] = _parse_mosaic_memory_size(match.group(2)) + + return total_allocated_bytes, category_bytes + + +def _build_mosaic_peak_breakdown(summary: dict[str, Any], categories_output: str) -> dict[str, Any]: + """Build a runtime-oriented peak breakdown from Mosaic category output.""" + total_allocated_bytes, category_bytes = _parse_mosaic_category_profile(categories_output) + static_memory_bytes = float(summary["static_memory_bytes"]) + total_peak_bytes = float(summary["overall_peak_bytes"]) + total_dynamic_bytes = ( + float(summary["dynamic_peak_bytes"]) + if total_allocated_bytes is None + else total_allocated_bytes + ) + categories = { + "Static": static_memory_bytes, + "Activation": category_bytes.get("ACTIVATION", 0.0), + "Backward": category_bytes.get("BACKWARD", 0.0), + "Optimizer": category_bytes.get("OPTIMIZER", 0.0), + "Unknown": category_bytes.get("UNKNOWN", 0.0), + } + return { + "total_peak_bytes": total_peak_bytes, + "total_peak_gib": bytes_to_gib(total_peak_bytes), + "total_dynamic_bytes": total_dynamic_bytes, + "total_dynamic_gib": bytes_to_gib(total_dynamic_bytes), + "categories": { + key: {"bytes": value, "gib": bytes_to_gib(value)} for key, value in categories.items() + }, + } + + def analyze_snapshot(name: str) -> MemorySummary: """Generate Mosaic reports and a summary JSON file for one snapshot.""" snapshot_path = SNAPSHOT_DIR / f"{name}_snapshot.pickle" @@ -235,20 +549,25 @@ def analyze_snapshot(name: str) -> MemorySummary: annotations_path = OUTPUT_DIR / f"{name}_annotations.html" peak_report_path = OUTPUT_DIR / f"{name}_peak.txt" summary_path = OUTPUT_DIR / f"{name}_summary.json" + allocator_state_path = RESULTS_DIR / f"{name}_allocator_state.json" - get_memory_profile( - snapshot=str(snapshot_path), - out_path=str(categories_path), - profile="categories", - sampling_rate=1, - preserve_allocation_order=True, + _categories_profile_result, categories_output = capture_stdout( + lambda: get_memory_profile( + snapshot=str(snapshot_path), + out_path=str(categories_path), + profile="categories", + sampling_rate=1, + preserve_allocation_order=True, + ) ) - get_memory_profile( - snapshot=str(snapshot_path), - out_path=str(annotations_path), - profile="annotations", - sampling_rate=1, - preserve_allocation_order=True, + _annotations_profile_result, annotations_profile_output = capture_stdout( + lambda: get_memory_profile( + snapshot=str(snapshot_path), + out_path=str(annotations_path), + profile="annotations", + sampling_rate=1, + preserve_allocation_order=True, + ) ) annotation_usage, annotation_output = capture_stdout( @@ -277,15 +596,38 @@ def analyze_snapshot(name: str) -> MemorySummary: for stage, (metadata, memory_bytes) in annotation_usage.items() } peak_summary = build_peak_summary(name, peak_memory_abstract) + peak_summary["mosaic_peak_breakdown"] = _build_mosaic_peak_breakdown( + peak_summary, categories_output + ) peak_summary["annotation_memory"] = annotation_summary + if allocator_state_path.exists(): + peak_summary["allocator_state"] = json.loads( + allocator_state_path.read_text(encoding="utf-8") + ) + live_tensor_path = RESULTS_DIR / f"{name}_live_tensors.json" + if live_tensor_path.exists(): + peak_summary["live_tensor_accounting"] = json.loads( + live_tensor_path.read_text(encoding="utf-8") + ) peak_summary["files"] = { "snapshot": str(snapshot_path), "categories_html": str(categories_path), "annotations_html": str(annotations_path), "peak_report": str(peak_report_path), + "allocator_state": str(allocator_state_path), + "live_tensors": str(live_tensor_path), } - peak_report_path.write_text(peak_output + "\n" + annotation_output, encoding="utf-8") + peak_report_path.write_text( + categories_output + + "\n" + + annotations_profile_output + + "\n" + + peak_output + + "\n" + + annotation_output, + encoding="utf-8", + ) summary_path.write_text(json.dumps(peak_summary, indent=2), encoding="utf-8") sys.stdout.write(f"{name}\n") @@ -298,19 +640,40 @@ def analyze_snapshot(name: str) -> MemorySummary: def write_comparison(summaries: list[MemorySummary]) -> Path: - """Write a dense-vs-LoRA comparison JSON file.""" + """Write a comparison JSON file covering all profiled variants.""" summary_by_name = {summary.name: summary for summary in summaries} - dense = summary_by_name["dense"] - frozen_lora = summary_by_name["frozen_lora"] + linear = summary_by_name[LinearModelVariant.LINEAR.value] + + pairwise_deltas = {} + for variant_name in ( + LinearModelVariant.LINEAR_LORA.value, + LinearModelVariant.LINEAR_STELLA.value, + ): + variant_summary = summary_by_name[variant_name] + pairwise_deltas[f"{variant_name}_minus_linear"] = { + "dynamic_peak_bytes": variant_summary.dynamic_peak_bytes - linear.dynamic_peak_bytes, + "dynamic_peak_gib": variant_summary.dynamic_peak_gib - linear.dynamic_peak_gib, + "overall_peak_bytes": variant_summary.overall_peak_bytes - linear.overall_peak_bytes, + "overall_peak_gib": variant_summary.overall_peak_gib - linear.overall_peak_gib, + } + + linear_lora = summary_by_name[LinearModelVariant.LINEAR_LORA.value] + linear_stella = summary_by_name[LinearModelVariant.LINEAR_STELLA.value] comparison = { - "dense": dense.to_dict(), - "frozen_lora": frozen_lora.to_dict(), + LinearModelVariant.LINEAR.value: linear.to_dict(), + LinearModelVariant.LINEAR_LORA.value: linear_lora.to_dict(), + LinearModelVariant.LINEAR_STELLA.value: linear_stella.to_dict(), "delta": { - "dynamic_peak_bytes": frozen_lora.dynamic_peak_bytes - dense.dynamic_peak_bytes, - "dynamic_peak_gib": frozen_lora.dynamic_peak_gib - dense.dynamic_peak_gib, - "overall_peak_bytes": frozen_lora.overall_peak_bytes - dense.overall_peak_bytes, - "overall_peak_gib": frozen_lora.overall_peak_gib - dense.overall_peak_gib, + **pairwise_deltas, + "linear_stella_minus_linear_lora": { + "dynamic_peak_bytes": linear_stella.dynamic_peak_bytes + - linear_lora.dynamic_peak_bytes, + "dynamic_peak_gib": linear_stella.dynamic_peak_gib - linear_lora.dynamic_peak_gib, + "overall_peak_bytes": linear_stella.overall_peak_bytes + - linear_lora.overall_peak_bytes, + "overall_peak_gib": linear_stella.overall_peak_gib - linear_lora.overall_peak_gib, + }, }, } @@ -331,14 +694,24 @@ def write_theory_comparison(summaries: list[MemorySummary]) -> Path: for summary in summaries: variant = LinearModelVariant(summary.name) theory = build_theoretical_summary(EXPERIMENT_CONFIG, variant) + variant_notes = [ + "Warmup iterations are excluded from the main theory comparison so the reported metrics reflect steady-state training rather than cold-start effects.", + "The comparison now uses steady-state floor, backward delta, and peak-over-floor metrics instead of raw stage-end bytes because absolute stage-end values mix persistent allocator state with per-step transients.", + "The memory_accounting block separates theoretical steady-state model memory, measured live tensor memory, measured reserved or cached allocator memory, and unexplained active allocations that are not visible as tracked live tensors.", + "Keep the theory-vs-experiment gap explicit: disagreement is a signal to investigate, not something to smooth away.", + ] + if variant is LinearModelVariant.LINEAR_STELLA: + variant_notes.append( + "The current StelLA run uses the existing StelLAAdamW hook-based optimizer, so persistent optimizer state follows the full trainable StelLA factorization rather than a reduced custom state model." + ) comparison = compare_theory_to_measurement( theory, summary, tolerances=ComparisonTolerances(), - notes=( - "Keep the theory-vs-experiment gap explicit: disagreement is a signal to investigate, not something to smooth away.", + active_occurrence=EXPERIMENT_CONFIG.warmup_steps, + narrative=ComparisonNarrative( + notes=tuple(variant_notes), possible_gap_sources=tuple(possible_gap_sources) ), - possible_gap_sources=tuple(possible_gap_sources), ) comparisons[summary.name] = comparison.to_dict() @@ -347,18 +720,120 @@ def write_theory_comparison(summaries: list[MemorySummary]) -> Path: return COMPARISON_REPORT_PATH +def _bytes_to_mib(num_bytes: float) -> float: + """Convert bytes to mebibytes.""" + return num_bytes / 1024**2 + + +def _measured_breakdown(summary: MemorySummary) -> dict[str, float]: + """Build a widget-friendly measured breakdown from one experiment summary.""" + occurrence = EXPERIMENT_CONFIG.warmup_steps + live_tensor_accounting = summary.live_tensor_accounting + allocator_state = summary.allocator_state + if live_tensor_accounting is None or allocator_state is None: + message = ( + f"Measured breakdown requires allocator and live-tensor accounting for {summary.name}" + ) + raise ValueError(message) + + parameters_bytes = float(live_tensor_accounting.categories["parameters"].bytes) + optimizer_bytes = float(live_tensor_accounting.categories["optimizer_state"].bytes) + activations_bytes = max( + 0.0, + summary.annotation_delta_bytes( + "## forward ##", "START", "## forward ##", "END", occurrence=occurrence + ), + ) + gradients_bytes = max( + 0.0, + summary.annotation_delta_bytes( + "## forward ##", "END", "## backward ##", "END", occurrence=occurrence + ), + ) + overhead_bytes = max( + 0.0, + float(allocator_state.allocated_bytes) + - float(live_tensor_accounting.total_live_tensor_bytes), + ) + + total_bytes = ( + parameters_bytes + optimizer_bytes + activations_bytes + gradients_bytes + overhead_bytes + ) + return { + "Overhead": _bytes_to_mib(overhead_bytes), + "Parameters": _bytes_to_mib(parameters_bytes), + "Activations": _bytes_to_mib(activations_bytes), + "Gradients": _bytes_to_mib(gradients_bytes), + "Optimizer": _bytes_to_mib(optimizer_bytes), + "total_mib": _bytes_to_mib(total_bytes), + } + + +def write_widget_breakdown(summaries: list[MemorySummary]) -> Path: + """Write a compact measured breakdown for the plotting widget.""" + payload = { + "config": { + "batch_size": EXPERIMENT_CONFIG.batch_size, + "in_features": EXPERIMENT_CONFIG.in_features, + "out_features": EXPERIMENT_CONFIG.out_features, + "lora_rank": EXPERIMENT_CONFIG.lora_rank, + "warmup_steps": EXPERIMENT_CONFIG.warmup_steps, + "steps": EXPERIMENT_CONFIG.steps, + }, + "variants": {summary.name: _measured_breakdown(summary) for summary in summaries}, + } + output_path = RESULTS_DIR / "widget_breakdown.json" + output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return output_path + + +def write_mosaic_peak_breakdown(summaries: list[MemorySummary]) -> Path: + """Write a Mosaic-aligned peak-runtime breakdown for plotting.""" + summary_payloads = { + summary.name: json.loads( + (OUTPUT_DIR / f"{summary.name}_summary.json").read_text(encoding="utf-8") + ) + for summary in summaries + } + payload = { + "config": { + "batch_size": EXPERIMENT_CONFIG.batch_size, + "in_features": EXPERIMENT_CONFIG.in_features, + "out_features": EXPERIMENT_CONFIG.out_features, + "lora_rank": EXPERIMENT_CONFIG.lora_rank, + "warmup_steps": EXPERIMENT_CONFIG.warmup_steps, + "steps": EXPERIMENT_CONFIG.steps, + }, + "variants": { + name: summary_payloads[name]["mosaic_peak_breakdown"] for name in SNAPSHOT_NAMES + }, + } + output_path = RESULTS_DIR / "mosaic_peak_breakdown.json" + output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return output_path + + def run_mosaic_analysis() -> None: """Analyze all known snapshots and emit a comparison summary.""" summaries = [analyze_snapshot(name) for name in SNAPSHOT_NAMES] comparison_path = write_comparison(summaries) theory_path = write_theory_comparison(summaries) + widget_breakdown_path = write_widget_breakdown(summaries) + mosaic_peak_breakdown_path = write_mosaic_peak_breakdown(summaries) sys.stdout.write(f"comparison: {comparison_path}\n") sys.stdout.write(f"theory_comparison: {theory_path}\n") + sys.stdout.write(f"widget_breakdown: {widget_breakdown_path}\n") + sys.stdout.write(f"mosaic_peak_breakdown: {mosaic_peak_breakdown_path}\n") def main() -> None: """Run profiling and, when available, Mosaic post-processing.""" args = parse_args() + config = build_experiment_config(args) + + global EXPERIMENT_CONFIG # noqa: PLW0603 + EXPERIMENT_CONFIG = config + configure_runtime_paths(build_results_dir(config, args.output_tag)) RESULTS_DIR.mkdir(parents=True, exist_ok=True) SNAPSHOT_DIR.mkdir(parents=True, exist_ok=True) @@ -369,8 +844,9 @@ def main() -> None: inputs, labels = build_inputs() - run_profile("dense", make_dense_model(), inputs, labels) - run_profile("frozen_lora", make_lora_model(), inputs, labels) + run_profile(LinearModelVariant.LINEAR.value, make_dense_model(), inputs, labels) + run_profile(LinearModelVariant.LINEAR_LORA.value, make_lora_model(), inputs, labels) + run_profile(LinearModelVariant.LINEAR_STELLA.value, make_stella_model(), inputs, labels) if DEVICE.type == "cuda": run_mosaic_analysis() diff --git a/scripts/plot_memory_mosaic_widget.py b/scripts/plot_memory_mosaic_widget.py new file mode 100644 index 0000000..7d8fa3b --- /dev/null +++ b/scripts/plot_memory_mosaic_widget.py @@ -0,0 +1,630 @@ +"""Interactive widget for Mosaic-aligned peak memory breakdowns.""" + +# %% +import json +import re +import subprocess +import sys +from pathlib import Path +from typing import Any + +import ipywidgets as widgets +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt +import numpy as np +from IPython.display import clear_output, display +from matplotlib.axes import Axes + +from stellatscale.memory_experiment import ( + LinearModelVariant, + MemoryExperimentConfig, + build_theoretical_summary, +) + +ROOT_DIR = Path(__file__).parents[1] +RESULTS_ROOT = ROOT_DIR / "results" / "memory" / "single_layer_lora" +FIGURES_DIR = ROOT_DIR / "figures" +FIGURES_DIR.mkdir(parents=True, exist_ok=True) +ANALYSIS_SCRIPT = ROOT_DIR / "scripts" / "lora_memory_analysis.py" + +CATEGORIES = ["Static", "Activation", "Backward", "Optimizer", "Unknown"] +TIMELINE_CATEGORIES = ["Static", "Activation", "Backward", "Optimizer", "Unknown"] +MODEL_NAMES = ["Linear", "LoRA", "StelLA"] +VARIANT_NAMES = ["linear", "linear_lora", "linear_stella"] +VARIANT_LABELS = {"Linear": "linear", "LoRA": "linear_lora", "StelLA": "linear_stella"} +COLORS = { + "Static": "#355C7D", + "Activation": "#90A955", + "Backward": "#F8B195", + "Optimizer": "#C3423F", + "Unknown": "#D97D54", +} + + +def _config_tag(d_in: int, d_out: int, r: int, b: int) -> str: + """Return the run-directory tag for one slider configuration.""" + return f"din_{d_in}_dout_{d_out}_r_{r}_b_{b}" + + +def _bytes_to_mib(num_bytes: float) -> float: + """Convert bytes to mebibytes.""" + return num_bytes / 1024**2 + + +def load_mosaic_peak_breakdown( + d_in: int, d_out: int, r: int, b: int +) -> tuple[dict[str, float], dict[str, float], dict[str, float], float, float, float] | None: + """Load the exported Mosaic peak breakdown for one configuration, if present.""" + candidate_paths = [ + RESULTS_ROOT / "mosaic_peak_breakdown.json", + RESULTS_ROOT / "runs" / _config_tag(d_in, d_out, r, b) / "mosaic_peak_breakdown.json", + ] + for path in candidate_paths: + if not path.exists(): + continue + payload = json.loads(path.read_text(encoding="utf-8")) + config = payload.get("config", {}) + if config.get("in_features") != d_in: + continue + if config.get("out_features") != d_out: + continue + if config.get("lora_rank") != r: + continue + if config.get("batch_size") != b: + continue + + variants = payload["variants"] + values: list[dict[str, float]] = [] + total_peak_values: list[float] = [] + for variant_name in VARIANT_NAMES: + breakdown = variants[variant_name] + categories = { + category: _bytes_to_mib(float(breakdown["categories"][category]["bytes"])) + for category in CATEGORIES + } + values.append(categories) + total_peak_values.append(_bytes_to_mib(float(breakdown["total_peak_bytes"]))) + return ( + values[0], + values[1], + values[2], + total_peak_values[0], + total_peak_values[1], + total_peak_values[2], + ) + + return None + + +def has_mosaic_peak_breakdown(d_in: int, d_out: int, r: int, b: int) -> bool: + """Return whether a Mosaic peak export exists for the selected configuration.""" + return load_mosaic_peak_breakdown(d_in, d_out, r, b) is not None + + +def _mosaic_output_dir(d_in: int, d_out: int, r: int, b: int) -> Path | None: + """Return the Mosaic output directory for the selected configuration, if present.""" + candidate_dirs = [ + RESULTS_ROOT / "mosaic", + RESULTS_ROOT / "runs" / _config_tag(d_in, d_out, r, b) / "mosaic", + ] + for candidate_dir in candidate_dirs: + summary_path = candidate_dir.parent / "mosaic_peak_breakdown.json" + if not candidate_dir.exists() or not summary_path.exists(): + continue + payload = json.loads(summary_path.read_text(encoding="utf-8")) + config = payload.get("config", {}) + if config.get("in_features") != d_in: + continue + if config.get("out_features") != d_out: + continue + if config.get("lora_rank") != r: + continue + if config.get("batch_size") != b: + continue + return candidate_dir + return None + + +def mosaic_html_path(variant_name: str) -> Path | None: + """Return the generated Mosaic categories HTML for the current widget configuration.""" + output_dir = _mosaic_output_dir(w_din.value, w_dout.value, w_r.value, w_b.value) + if output_dir is None: + return None + candidate_path = output_dir / f"{variant_name}_categories.html" + if candidate_path.exists(): + return candidate_path + return None + + +def mosaic_summary_path(variant_name: str) -> Path | None: + """Return the generated Mosaic summary JSON for the current widget configuration.""" + output_dir = _mosaic_output_dir(w_din.value, w_dout.value, w_r.value, w_b.value) + if output_dir is None: + return None + candidate_path = output_dir / f"{variant_name}_summary.json" + if candidate_path.exists(): + return candidate_path + return None + + +def load_mosaic_categories_timeline( + variant_name: str, +) -> tuple[np.ndarray, dict[str, np.ndarray]] | None: + """Load the Mosaic categories-over-time series from the generated categories HTML artifact.""" + html_path = mosaic_html_path(variant_name) + if html_path is None: + return None + + measured_breakdown = load_mosaic_peak_breakdown(w_din.value, w_dout.value, w_r.value, w_b.value) + if measured_breakdown is None: + return None + variant_index = VARIANT_NAMES.index(variant_name) + measured_variants = list(measured_breakdown[:3]) + static_mib = measured_variants[variant_index]["Static"] + + html_content = html_path.read_text(encoding="utf-8") + spec_match = re.search(r"var spec = (\{.*?\});\s*var embedOpt", html_content, flags=re.DOTALL) + if spec_match is None: + return None + + spec = json.loads(spec_match.group(1)) + datasets = spec.get("datasets", {}) + if not datasets: + return None + + records = next(iter(datasets.values())) + event_indices = sorted({int(record["event_idx"]) for record in records}) + series_by_category: dict[str, dict[int, float]] = { + category: dict.fromkeys(event_indices, 0.0) for category in TIMELINE_CATEGORIES + } + for event_idx in event_indices: + series_by_category["Static"][event_idx] = static_mib + for record in records: + raw_category = str(record["cat"]) + _, normalized_category = raw_category.split("_", 1) + category_name = normalized_category.title() + if category_name not in series_by_category: + continue + series_by_category[category_name][int(record["event_idx"])] += float(record["sum"]) * 1024.0 + + x_values = np.array(event_indices, dtype=float) + y_values = { + category: np.array( + [series_by_category[category][event_idx] for event_idx in event_indices], dtype=float + ) + for category in TIMELINE_CATEGORIES + } + return x_values, y_values + + +def load_mosaic_step_time_ms(variant_name: str) -> float | None: + """Return the average measured step duration in milliseconds for one variant.""" + summary_path = mosaic_summary_path(variant_name) + if summary_path is None: + return None + + payload = json.loads(summary_path.read_text(encoding="utf-8")) + annotation_memory = payload.get("annotation_memory", {}) + durations_ms: list[float] = [] + for occurrence in range(1, 128): + suffix = "" if occurrence == 1 else f"({occurrence - 1})" + start_key = f"## forward ##_START{suffix}" + end_key = f"## optimizer ##_END{suffix}" + start_payload = annotation_memory.get(start_key) + end_payload = annotation_memory.get(end_key) + if start_payload is None or end_payload is None: + if occurrence > 1: + break + continue + start_time = float(start_payload["annotation"]["time_us"]) + end_time = float(end_payload["annotation"]["time_us"]) + durations_ms.append((end_time - start_time) / 1000.0) + + if not durations_ms: + return None + return float(sum(durations_ms) / len(durations_ms)) + + +def _analytical_variant_breakdown( + config: MemoryExperimentConfig, variant: LinearModelVariant +) -> tuple[dict[str, float], float]: + """Build a runtime-aligned analytical breakdown for one variant.""" + theory = build_theoretical_summary(config, variant) + categories = { + "Static": _bytes_to_mib(float(theory.static_baseline_bytes)), + "Activation": _bytes_to_mib(float(theory.forward_dynamic_estimate_bytes)), + "Backward": _bytes_to_mib(float(theory.gradient_bytes)), + "Optimizer": _bytes_to_mib(float(theory.optimizer_state_bytes)), + "Unknown": 0.0, + } + return categories, sum(categories.values()) + + +def load_analytical_breakdown( + d_in: int, d_out: int, r: int, b: int +) -> tuple[dict[str, float], dict[str, float], dict[str, float], float, float, float]: + """Build the analytical prediction using the runtime-aligned bucket semantics.""" + config = MemoryExperimentConfig(batch_size=b, in_features=d_in, out_features=d_out, lora_rank=r) + linear_categories, linear_total_peak = _analytical_variant_breakdown( + config, LinearModelVariant.LINEAR + ) + lora_categories, lora_total_peak = _analytical_variant_breakdown( + config, LinearModelVariant.LINEAR_LORA + ) + stella_categories, stella_total_peak = _analytical_variant_breakdown( + config, LinearModelVariant.LINEAR_STELLA + ) + return ( + linear_categories, + lora_categories, + stella_categories, + linear_total_peak, + lora_total_peak, + stella_total_peak, + ) + + +style = {"description_width": "initial"} +layout = widgets.Layout(width="400px") +w_din = widgets.IntSlider( + value=4096, + min=128, + max=16384, + step=128, + description="Input Dim (d_in):", + style=style, + layout=layout, +) +w_dout = widgets.IntSlider( + value=4096, + min=128, + max=16384, + step=128, + description="Output Dim (d_out):", + style=style, + layout=layout, +) +w_r = widgets.IntSlider( + value=128, min=1, max=256, step=1, description="LoRA Rank (r):", style=style, layout=layout +) +w_b = widgets.IntSlider( + value=128, min=1, max=512, step=1, description="Batch Size (b):", style=style, layout=layout +) +btn_run_experiment = widgets.Button(description="▶ Run Experiment", button_style="warning") +btn_save_png = widgets.Button(description="💾 Save PNG", button_style="success") +out_plot = widgets.Output() +status_message = widgets.HTML(value="") + + +def _config_text() -> str: + """Return the current configuration label used in the composite figure footer.""" + return f"d_in: {w_din.value} | d_out: {w_dout.value} | Rank (r): {w_r.value} | Batch (b): {w_b.value}" + + +def _load_composite_plot_data() -> tuple[ + list[dict[str, float]], + list[float], + list[dict[str, float]], + list[float], + list[tuple[np.ndarray, dict[str, np.ndarray]]], +]: + """Load the measured and analytical breakdowns plus all measured timelines.""" + measured_breakdown = load_mosaic_peak_breakdown(w_din.value, w_dout.value, w_r.value, w_b.value) + if measured_breakdown is None: + message = "No Mosaic peak breakdown exists for the selected configuration." + raise ValueError(message) + + analytical_breakdown = load_analytical_breakdown( + w_din.value, w_dout.value, w_r.value, w_b.value + ) + measured_values = list(measured_breakdown[:3]) + measured_totals = list(measured_breakdown[3:]) + analytical_values = list(analytical_breakdown[:3]) + analytical_totals = list(analytical_breakdown[3:]) + + timeline_payloads: list[tuple[np.ndarray, dict[str, np.ndarray]]] = [] + for variant_name in VARIANT_NAMES: + timeline = load_mosaic_categories_timeline(variant_name) + if timeline is None: + message = f"No generated Mosaic categories timeline exists for {variant_name}. Run the experiment first." + raise ValueError(message) + timeline_payloads.append(timeline) + + return measured_values, measured_totals, analytical_values, analytical_totals, timeline_payloads + + +def _scale_breakdown_values( + measured_values: list[dict[str, float]], + measured_totals: list[float], + analytical_values: list[dict[str, float]], + analytical_totals: list[float], +) -> tuple[list[dict[str, float]], list[float], list[dict[str, float]], list[float]]: + """Scale only the top breakdown stacks to percentages of the measured dense baseline.""" + baseline_total = measured_totals[0] + measured_values = [ + {category: value * (100.0 / baseline_total) for category, value in breakdown.items()} + for breakdown in measured_values + ] + analytical_values = [ + {category: value * (100.0 / baseline_total) for category, value in breakdown.items()} + for breakdown in analytical_values + ] + return measured_values, measured_totals, analytical_values, analytical_totals + + +def _draw_breakdown_axis( + ax_breakdown: Axes, + measured_values: list[dict[str, float]], + measured_totals: list[float], + analytical_values: list[dict[str, float]], + analytical_totals: list[float], +) -> None: + """Draw the combined measured-vs-analytical stacked breakdown panel.""" + model_centers = np.array(range(len(MODEL_NAMES)), dtype=float) * 2.4 + bar_width = 0.84 + source_offsets = {"Measured": -0.42, "Analytical": 0.42} + breakdown_sets = { + "Measured": (measured_values, measured_totals), + "Analytical": (analytical_values, analytical_totals), + } + + all_bar_totals = [ + *(sum(breakdown.values()) for breakdown in measured_values), + *(sum(breakdown.values()) for breakdown in analytical_values), + ] + max_bar_total = max(all_bar_totals) + label_offset = max(0.02 * max_bar_total, 1.2) + for source_name, (breakdowns, totals) in breakdown_sets.items(): + x_positions = model_centers + source_offsets[source_name] + bottoms = np.zeros(len(MODEL_NAMES)) + for category in CATEGORIES: + heights = np.array([breakdown[category] for breakdown in breakdowns]) + ax_breakdown.bar( + x_positions, + heights, + bottom=bottoms, + width=bar_width, + color=COLORS[category], + edgecolor="white", + linewidth=1.2, + ) + bottoms += heights + for index, total in enumerate(totals): + ax_breakdown.text( + x_positions[index], + bottoms[index] + label_offset, + f"{total:.1f}", + ha="center", + va="bottom", + fontsize=12, + fontweight="bold", + color="#1F2933", + ) + + for center in model_centers: + ax_breakdown.text( + center + source_offsets["Measured"], + -max_bar_total * 0.06, + "Measured", + ha="center", + va="top", + fontsize=11, + color="#51606F", + ) + ax_breakdown.text( + center + source_offsets["Analytical"], + -max_bar_total * 0.06, + "Analytical", + ha="center", + va="top", + fontsize=11, + color="#51606F", + ) + + ax_breakdown.set_xlim(model_centers[0] - 1.3, model_centers[-1] + 1.3) + ax_breakdown.set_ylim(-max_bar_total * 0.12, max_bar_total * 1.15) + ax_breakdown.set_xticks([]) + ax_breakdown.set_ylabel("Memory (%)", fontsize=16) + ax_breakdown.tick_params(axis="y", labelsize=14) + ax_breakdown.spines["top"].set_visible(False) + ax_breakdown.spines["right"].set_visible(False) + ax_breakdown.spines["left"].set_visible(False) + ax_breakdown.spines["bottom"].set_visible(False) + ax_breakdown.yaxis.grid(visible=True, linestyle="--", alpha=0.5) + ax_breakdown.set_axisbelow(True) + ax_breakdown.set_facecolor("#FAFAF7") + + handles = [ + mpatches.Patch(facecolor=COLORS[category], edgecolor="white") for category in CATEGORIES + ] + ax_breakdown.legend( + handles, + CATEGORIES, + loc="upper center", + bbox_to_anchor=(0.5, 1.16), + ncol=5, + frameon=False, + fontsize=15, + handletextpad=0.7, + columnspacing=1.1, + ) + ax_breakdown.text( + 0.985, + 0.985, + _config_text(), + transform=ax_breakdown.transAxes, + ha="right", + va="top", + fontsize=11, + family="monospace", + color="#51606F", + bbox={"facecolor": "#FFFFFFCC", "edgecolor": "#D7DBE0", "boxstyle": "round,pad=0.35"}, + ) + + +def _draw_timeline_axis( + axis: Axes, + variant_name: str, + timeline: tuple[np.ndarray, dict[str, np.ndarray]], + *, + show_ylabel: bool, +) -> None: + """Draw one measured Mosaic timeline subplot.""" + x_values, y_values = timeline + stacked_series = [y_values[category] for category in TIMELINE_CATEGORIES] + + axis.stackplot( + x_values, + stacked_series, + colors=[COLORS[category] for category in TIMELINE_CATEGORIES], + alpha=0.85, + ) + total_series = np.sum(np.vstack(stacked_series), axis=0) + peak_index = int(np.argmax(total_series)) + axis.axvline(x_values[peak_index], color="#1F2933", linestyle="--", linewidth=1.6, alpha=0.9) + axis.set_xlabel("") + if show_ylabel: + axis.set_ylabel("Memory (MiB)", fontsize=16) + else: + axis.tick_params(axis="y", left=False, labelleft=False) + axis.spines["left"].set_visible(False) + axis.spines["top"].set_visible(False) + axis.spines["right"].set_visible(False) + axis.yaxis.grid(visible=True, linestyle="--", alpha=0.4) + axis.set_axisbelow(True) + axis.set_facecolor("#FAFAF7") + + step_time_ms = load_mosaic_step_time_ms(variant_name) + if step_time_ms is not None: + axis.text( + 0.98, + 0.96, + f"avg step {step_time_ms:.2f} ms", + transform=axis.transAxes, + ha="right", + va="top", + fontsize=11, + color="#51606F", + bbox={"facecolor": "#FFFFFFCC", "edgecolor": "#D7DBE0", "boxstyle": "round,pad=0.25"}, + ) + + +def draw_plot(*, is_saving: bool = False, filepath: Path | None = None) -> None: + """Render one composite figure with breakdowns and all Mosaic timelines.""" + measured_values, measured_totals, analytical_values, analytical_totals, timeline_payloads = ( + _load_composite_plot_data() + ) + measured_values, measured_totals, analytical_values, analytical_totals = ( + _scale_breakdown_values( + measured_values, measured_totals, analytical_values, analytical_totals + ) + ) + + fig = plt.figure(figsize=(16, 14), dpi=100 if not is_saving else 300) + fig.patch.set_facecolor("#FFFFFF") + grid = fig.add_gridspec(3, 3, height_ratios=[2.05, 2.6, 0.14], hspace=0.0, wspace=0.0) + ax_breakdown = fig.add_subplot(grid[0, :]) + first_timeline_axis = fig.add_subplot(grid[1, 0]) + timeline_axes = [ + first_timeline_axis, + fig.add_subplot(grid[1, 1], sharey=first_timeline_axis), + fig.add_subplot(grid[1, 2], sharey=first_timeline_axis), + ] + _draw_breakdown_axis( + ax_breakdown, measured_values, measured_totals, analytical_values, analytical_totals + ) + for index, (axis, variant_name, timeline) in enumerate( + zip(timeline_axes, VARIANT_NAMES, timeline_payloads, strict=True) + ): + _draw_timeline_axis(axis, variant_name, timeline, show_ylabel=index == 0) + + for axis, model_name in zip(timeline_axes, MODEL_NAMES, strict=True): + bounds = axis.get_position() + fig.text( + (bounds.x0 + bounds.x1) / 2, + bounds.y0 - 0.018, + model_name, + ha="center", + va="top", + fontsize=14, + fontweight="bold", + color="#1F2933", + ) + if is_saving and filepath is not None: + fig.savefig(filepath, bbox_inches="tight", facecolor=fig.get_facecolor()) + plt.close(fig) + else: + plt.show() + + +def update(*args: Any) -> None: + """Refresh the figure after any widget change.""" + if not has_mosaic_peak_breakdown(w_din.value, w_dout.value, w_r.value, w_b.value): + status_message.value = "No Mosaic peak export exists for this configuration yet. Run the experiment first." + else: + status_message.value = "Showing combined measured Mosaic, analytical breakdown, and all three Mosaic timelines." + + with out_plot: + clear_output(wait=True) + if has_mosaic_peak_breakdown(w_din.value, w_dout.value, w_r.value, w_b.value): + draw_plot() + + +def run_experiment(*args: Any) -> None: + """Run the profiling workflow and regenerate the Mosaic peak export.""" + btn_run_experiment.disabled = True + btn_save_png.disabled = True + status_message.value = ( + "Running experiment for the selected configuration..." + ) + command = [ + sys.executable, + str(ANALYSIS_SCRIPT), + "--in-features", + str(w_din.value), + "--out-features", + str(w_dout.value), + "--lora-rank", + str(w_r.value), + "--batch-size", + str(w_b.value), + ] + try: + subprocess.run(command, cwd=ROOT_DIR, capture_output=True, text=True, check=True) # noqa: S603 + except subprocess.CalledProcessError as exc: + error_output = exc.stderr.strip() or exc.stdout.strip() or "Unknown error" + status_message.value = f"Experiment failed.
{error_output}
" + else: + status_message.value = "Experiment finished. Mosaic peak breakdown loaded for this configuration." + update() + finally: + btn_run_experiment.disabled = False + btn_save_png.disabled = False + + +def save_png(*args: Any) -> None: + """Save the current composite Mosaic figure as a PNG image.""" + filepath = FIGURES_DIR / ( + f"single_layer_memory_mosaic_widget_din_{w_din.value}_dout_{w_dout.value}" + f"_r_{w_r.value}_b_{w_b.value}.png" + ) + draw_plot(is_saving=True, filepath=filepath) + status_message.value = f"Saved PNG to {filepath}" + + +for widget in [w_din, w_dout, w_r, w_b]: + widget.observe(update, names="value") +btn_run_experiment.on_click(run_experiment) +btn_save_png.on_click(save_png) + +sliders_col1 = widgets.VBox([w_din, w_dout]) +sliders_col2 = widgets.VBox([w_r, w_b]) +controls = widgets.HBox([sliders_col1, sliders_col2]) +buttons = widgets.HBox([btn_run_experiment, btn_save_png]) +ui_layout = widgets.VBox( + [controls, buttons, status_message], + layout=widgets.Layout(align_items="center", margin="20px 0 0 0"), +) + +display(out_plot, ui_layout) +update() diff --git a/src/stellatscale/__init__.py b/src/stellatscale/__init__.py index 596ee3c..89720ee 100644 --- a/src/stellatscale/__init__.py +++ b/src/stellatscale/__init__.py @@ -1,26 +1,38 @@ """stellatscale.""" from stellatscale.memory_experiment import ( + AllocatorMemoryState, ComparisonMetric, ComparisonTolerances, - FrozenLoRALinear, LinearModelVariant, + LiveTensorAccounting, + LiveTensorDescriptor, + MemoryAccountingBreakdown, MemoryExperimentConfig, MemorySummary, + TensorCategorySummary, TheoreticalMemorySummary, TheoryExperimentComparison, build_theoretical_summary, bytes_to_gib, compare_theory_to_measurement, ) +from stellatscale.models import LoRALinear, StelLAAdamW, StelLALinear __all__ = [ + "AllocatorMemoryState", "ComparisonMetric", "ComparisonTolerances", - "FrozenLoRALinear", "LinearModelVariant", + "LiveTensorAccounting", + "LiveTensorDescriptor", + "LoRALinear", + "MemoryAccountingBreakdown", "MemoryExperimentConfig", "MemorySummary", + "StelLAAdamW", + "StelLALinear", + "TensorCategorySummary", "TheoreticalMemorySummary", "TheoryExperimentComparison", "build_theoretical_summary", diff --git a/src/stellatscale/memory_experiment.py b/src/stellatscale/memory_experiment.py index ee32915..6e0bbad 100644 --- a/src/stellatscale/memory_experiment.py +++ b/src/stellatscale/memory_experiment.py @@ -1,4 +1,4 @@ -"""Reusable theory and comparison helpers for the dense-vs-LoRA memory experiment. +"""Reusable theory and comparison helpers for the linear memory experiment. The goal of this module is to keep theoretical accounting, measured summaries, and the gap between the two as first-class data. Approximate agreement is @@ -13,9 +13,6 @@ from enum import StrEnum from typing import TYPE_CHECKING, cast -import torch -from torch import nn - if TYPE_CHECKING: from collections.abc import Mapping from pathlib import Path @@ -29,8 +26,9 @@ def bytes_to_gib(num_bytes: float) -> float: class LinearModelVariant(StrEnum): """Supported linear-layer variants for the memory experiment.""" - DENSE = "dense" - FROZEN_LORA = "frozen_lora" + LINEAR = "linear" + LINEAR_LORA = "linear_lora" + LINEAR_STELLA = "linear_stella" @dataclass(frozen=True, slots=True) @@ -46,9 +44,11 @@ class MemoryExperimentConfig: out_features: Output width of the linear layer. lora_rank: - Rank of the LoRA adapters for the frozen-base variant. + Rank of the low-rank adaptation used by the LoRA and StelLA variants. + warmup_steps: + Number of warmup steps to run before the active profiling window begins. steps: - Number of training steps to profile. + Number of active training steps to profile after warmup. learning_rate: Optimizer learning rate. parameter_bytes: @@ -69,6 +69,7 @@ class MemoryExperimentConfig: in_features: int out_features: int lora_rank: int + warmup_steps: int = 5 steps: int = 5 learning_rate: float = 0.05 parameter_bytes: int = 4 @@ -96,6 +97,10 @@ def __post_init__(self) -> None: message = f"{field_name} must be strictly positive, got {value}" raise ValueError(message) + if self.warmup_steps < 0: + message = f"warmup_steps must be non-negative, got {self.warmup_steps}" + raise ValueError(message) + if self.learning_rate <= 0: message = f"learning_rate must be strictly positive, got {self.learning_rate}" raise ValueError(message) @@ -110,6 +115,16 @@ def lora_trainable_parameter_count(self) -> int: """Return the trainable LoRA element count.""" return self.lora_rank * (self.in_features + self.out_features) + @property + def stella_trainable_parameter_count(self) -> int: + """Return the trainable StelLA element count for U, S, and V^T.""" + return self.lora_trainable_parameter_count + (self.lora_rank**2) + + @property + def stella_optimizer_state_parameter_count(self) -> int: + """Return the trainable StelLA element count tracked by Adam-style state.""" + return self.stella_trainable_parameter_count + @property def input_bytes(self) -> int: """Return the input tensor size in bytes.""" @@ -120,6 +135,11 @@ def label_bytes(self) -> int: """Return the label tensor size in bytes.""" return self.batch_size * self.out_features * self.activation_bytes + @property + def total_profile_steps(self) -> int: + """Return the total number of profiled iterations including warmup.""" + return self.warmup_steps + self.steps + @dataclass(frozen=True, slots=True) class TheoreticalMemorySummary: @@ -171,22 +191,6 @@ def to_dict(self) -> dict[str, float | int | str]: } -class FrozenLoRALinear(nn.Module): - """Linear layer with a frozen base projection and trainable LoRA adapters.""" - - def __init__(self, base: nn.Linear, rank: int) -> None: - """Initialize the frozen base projection and low-rank adapters.""" - super().__init__() - self.base = base - self.base.requires_grad_(requires_grad=False) - self.lora_a = nn.Linear(base.in_features, rank, bias=False) - self.lora_b = nn.Linear(rank, base.out_features, bias=False) - - def forward(self, inputs: torch.Tensor) -> torch.Tensor: - """Apply the frozen base projection and LoRA update.""" - return self.base(inputs) + self.lora_b(self.lora_a(inputs)) - - @dataclass(frozen=True, slots=True) class AnnotationMetadata: """Metadata attached to one measured annotation event.""" @@ -206,6 +210,81 @@ class AnnotationMeasurement: memory_gib: float +@dataclass(frozen=True, slots=True) +class AllocatorMemoryState: + """Measured CUDA allocator state at a steady-state point in time.""" + + allocated_bytes: float + allocated_gib: float + reserved_bytes: float + reserved_gib: float + reserved_cached_bytes: float + reserved_cached_gib: float + + def to_dict(self) -> dict[str, float]: + """Convert the allocator state to a JSON-serializable mapping.""" + return { + "allocated_bytes": self.allocated_bytes, + "allocated_gib": self.allocated_gib, + "reserved_bytes": self.reserved_bytes, + "reserved_gib": self.reserved_gib, + "reserved_cached_bytes": self.reserved_cached_bytes, + "reserved_cached_gib": self.reserved_cached_gib, + } + + +@dataclass(frozen=True, slots=True) +class TensorCategorySummary: + """Summary of one live CUDA tensor category.""" + + bytes: float + gib: float + + def to_dict(self) -> dict[str, float]: + """Convert the tensor category summary to a JSON-serializable mapping.""" + return {"bytes": self.bytes, "gib": self.gib} + + +@dataclass(frozen=True, slots=True) +class LiveTensorDescriptor: + """Compact description of a live CUDA tensor not matched to a known category.""" + + bytes: float + gib: float + shape: tuple[int, ...] + dtype: str + requires_grad: bool + + def to_dict(self) -> dict[str, object]: + """Convert the live tensor descriptor to a JSON-serializable mapping.""" + return { + "bytes": self.bytes, + "gib": self.gib, + "shape": list(self.shape), + "dtype": self.dtype, + "requires_grad": self.requires_grad, + } + + +@dataclass(frozen=True, slots=True) +class LiveTensorAccounting: + """Break down live CUDA tensor storage at a steady-state checkpoint.""" + + total_live_tensor_bytes: float + total_live_tensor_gib: float + categories: dict[str, TensorCategorySummary] + top_other_tensors: tuple[LiveTensorDescriptor, ...] = () + + def to_dict(self) -> dict[str, object]: + """Convert the live tensor accounting to a JSON-serializable mapping.""" + return { + "total_live_tensor_bytes": self.total_live_tensor_bytes, + "total_live_tensor_gib": self.total_live_tensor_gib, + "categories": {key: value.to_dict() for key, value in self.categories.items()}, + "top_other_tensors": [tensor.to_dict() for tensor in self.top_other_tensors], + } + + @dataclass(frozen=True, slots=True) class MemorySummary: """Measured memory summary loaded from Mosaic output.""" @@ -218,6 +297,8 @@ class MemorySummary: overall_peak_bytes: float overall_peak_gib: float annotation_memory: dict[str, AnnotationMeasurement] + allocator_state: AllocatorMemoryState | None = None + live_tensor_accounting: LiveTensorAccounting | None = None files: dict[str, str] = field(default_factory=dict) @classmethod @@ -231,6 +312,14 @@ def from_mapping(cls, payload: Mapping[str, object]) -> MemorySummary: files = { key: _require_str(value, f"files[{key}]") for key, value in files_mapping.items() } + allocator_state_payload = payload.get("allocator_state") + allocator_state = None + if allocator_state_payload is not None: + allocator_state = _parse_allocator_memory_state(allocator_state_payload) + live_tensor_accounting_payload = payload.get("live_tensor_accounting") + live_tensor_accounting = None + if live_tensor_accounting_payload is not None: + live_tensor_accounting = _parse_live_tensor_accounting(live_tensor_accounting_payload) return cls( name=_require_str(payload.get("name"), "name"), @@ -250,6 +339,8 @@ def from_mapping(cls, payload: Mapping[str, object]) -> MemorySummary: key: _parse_annotation_measurement(key, value) for key, value in annotation_payload.items() }, + allocator_state=allocator_state, + live_tensor_accounting=live_tensor_accounting, files=files, ) @@ -269,6 +360,19 @@ def annotation_bytes(self, annotation_name: str, stage: str, occurrence: int = 0 raise KeyError(message) return measurement.memory_bytes + def annotation_delta_bytes( + self, + start_annotation_name: str, + start_stage: str, + end_annotation_name: str, + end_stage: str, + occurrence: int = 0, + ) -> float: + """Return the measured byte delta between two annotation events.""" + start_bytes = self.annotation_bytes(start_annotation_name, start_stage, occurrence) + end_bytes = self.annotation_bytes(end_annotation_name, end_stage, occurrence) + return end_bytes - start_bytes + def to_dict(self) -> dict[str, object]: """Convert the measured summary back to a JSON-serializable mapping.""" return { @@ -292,6 +396,14 @@ def to_dict(self) -> dict[str, object]: } for key, value in self.annotation_memory.items() }, + "allocator_state": ( + None if self.allocator_state is None else self.allocator_state.to_dict() + ), + "live_tensor_accounting": ( + None + if self.live_tensor_accounting is None + else self.live_tensor_accounting.to_dict() + ), "files": dict(self.files), } @@ -301,9 +413,17 @@ class ComparisonTolerances: """Relative tolerances used when comparing theory and measurement.""" static_baseline_relative: float = 0.05 - forward_dynamic_relative: float = 0.50 - backward_dynamic_relative: float = 0.50 - optimizer_dynamic_relative: float = 0.35 + steady_state_floor_relative: float = 0.35 + backward_delta_relative: float = 0.50 + peak_over_floor_relative: float = 0.35 + + +@dataclass(frozen=True, slots=True) +class ComparisonNarrative: + """Optional notes that explain theory-vs-measurement gaps.""" + + notes: tuple[str, ...] = () + possible_gap_sources: tuple[str, ...] = () @dataclass(frozen=True, slots=True) @@ -336,6 +456,44 @@ def to_dict(self) -> dict[str, float | str | bool | None]: } +@dataclass(frozen=True, slots=True) +class MemoryAccountingBreakdown: + """Break down theory, active tensors, reserved cache, and residual gap.""" + + theoretical_model_memory_bytes: float + measured_active_tensor_memory_bytes: float + measured_reserved_cached_memory_bytes: float + unexplained_gap_bytes: float + measured_total_reserved_bytes: float + measured_mosaic_static_memory_bytes: float + tensor_vs_theory_gap_bytes: float + + def to_dict(self) -> dict[str, float]: + """Convert the memory accounting breakdown to a JSON-serializable mapping.""" + return { + "theoretical_model_memory_bytes": self.theoretical_model_memory_bytes, + "theoretical_model_memory_gib": bytes_to_gib(self.theoretical_model_memory_bytes), + "measured_active_tensor_memory_bytes": self.measured_active_tensor_memory_bytes, + "measured_active_tensor_memory_gib": bytes_to_gib( + self.measured_active_tensor_memory_bytes + ), + "measured_reserved_cached_memory_bytes": self.measured_reserved_cached_memory_bytes, + "measured_reserved_cached_memory_gib": bytes_to_gib( + self.measured_reserved_cached_memory_bytes + ), + "unexplained_gap_bytes": self.unexplained_gap_bytes, + "unexplained_gap_gib": bytes_to_gib(self.unexplained_gap_bytes), + "measured_total_reserved_bytes": self.measured_total_reserved_bytes, + "measured_total_reserved_gib": bytes_to_gib(self.measured_total_reserved_bytes), + "measured_mosaic_static_memory_bytes": self.measured_mosaic_static_memory_bytes, + "measured_mosaic_static_memory_gib": bytes_to_gib( + self.measured_mosaic_static_memory_bytes + ), + "tensor_vs_theory_gap_bytes": self.tensor_vs_theory_gap_bytes, + "tensor_vs_theory_gap_gib": bytes_to_gib(self.tensor_vs_theory_gap_bytes), + } + + @dataclass(frozen=True, slots=True) class TheoryExperimentComparison: """Comparison report preserving both agreement and disagreement.""" @@ -344,6 +502,7 @@ class TheoryExperimentComparison: theory: TheoreticalMemorySummary measured: MemorySummary metrics: dict[str, ComparisonMetric] + memory_accounting: MemoryAccountingBreakdown notes: tuple[str, ...] = () possible_gap_sources: tuple[str, ...] = () @@ -359,6 +518,7 @@ def to_dict(self) -> dict[str, object]: "theory": self.theory.to_dict(), "measured": self.measured.to_dict(), "metrics": {key: value.to_dict() for key, value in self.metrics.items()}, + "memory_accounting": self.memory_accounting.to_dict(), "failing_metrics": list(self.failing_metrics), "notes": list(self.notes), "possible_gap_sources": list(self.possible_gap_sources), @@ -377,15 +537,16 @@ def build_theoretical_summary( if config.include_label_in_static_baseline: static_extras += config.label_bytes - if variant is LinearModelVariant.DENSE: + if variant is LinearModelVariant.LINEAR: trainable_elements = dense_elements trainable_parameter_bytes = dense_parameter_bytes resident_parameter_bytes = dense_parameter_bytes forward_dynamic_estimate_bytes = ( config.batch_size * (config.in_features + config.out_features) * config.activation_bytes ) - name = LinearModelVariant.DENSE.value - else: + name = LinearModelVariant.LINEAR.value + optimizer_state_elements = trainable_elements + elif variant is LinearModelVariant.LINEAR_LORA: trainable_elements = config.lora_trainable_parameter_count trainable_parameter_bytes = trainable_elements * config.parameter_bytes resident_parameter_bytes = dense_parameter_bytes + trainable_parameter_bytes @@ -394,10 +555,24 @@ def build_theoretical_summary( * (config.in_features + config.out_features + config.lora_rank) * config.activation_bytes ) - name = LinearModelVariant.FROZEN_LORA.value + name = LinearModelVariant.LINEAR_LORA.value + optimizer_state_elements = trainable_elements + else: + trainable_elements = config.stella_trainable_parameter_count + trainable_parameter_bytes = trainable_elements * config.parameter_bytes + resident_parameter_bytes = dense_parameter_bytes + trainable_parameter_bytes + forward_dynamic_estimate_bytes = ( + config.batch_size + * (config.in_features + config.out_features + config.lora_rank) + * config.activation_bytes + ) + name = LinearModelVariant.LINEAR_STELLA.value + optimizer_state_elements = config.stella_optimizer_state_parameter_count gradient_bytes = trainable_elements * config.gradient_bytes - optimizer_state_bytes = trainable_elements * config.optimizer_state_bytes_per_trainable_element + optimizer_state_bytes = ( + optimizer_state_elements * config.optimizer_state_bytes_per_trainable_element + ) static_baseline_bytes = resident_parameter_bytes + static_extras backward_dynamic_estimate_bytes = gradient_bytes + forward_dynamic_estimate_bytes optimizer_dynamic_estimate_bytes = optimizer_state_bytes + forward_dynamic_estimate_bytes @@ -426,15 +601,53 @@ def compare_theory_to_measurement( theory: TheoreticalMemorySummary, measured: MemorySummary, tolerances: ComparisonTolerances | None = None, - notes: tuple[str, ...] = (), - possible_gap_sources: tuple[str, ...] = (), + active_occurrence: int = 0, + narrative: ComparisonNarrative | None = None, ) -> TheoryExperimentComparison: """Compare theoretical accounting against one measured summary. - The report intentionally preserves disagreement so it can be investigated - later rather than normalized away. + Warmup iterations are intentionally excluded from the primary comparison. + The main metrics use post-warmup steady-state memory so allocator warmup and + one-time optimizer initialization are not conflated with per-step theory. """ actual_tolerances = tolerances or ComparisonTolerances() + actual_narrative = narrative or ComparisonNarrative() + allocator_state = measured.allocator_state + theoretical_model_memory_bytes = float( + theory.static_baseline_bytes + theory.optimizer_state_bytes + ) + live_tensor_accounting = measured.live_tensor_accounting + measured_active_tensor_memory_bytes = ( + measured.static_memory_bytes + if live_tensor_accounting is None + else live_tensor_accounting.total_live_tensor_bytes + ) + measured_reserved_cached_memory_bytes = ( + 0.0 if allocator_state is None else allocator_state.reserved_cached_bytes + ) + active_allocated_bytes = ( + measured_active_tensor_memory_bytes + if allocator_state is None + else allocator_state.allocated_bytes + ) + measured_total_reserved_bytes = active_allocated_bytes + measured_reserved_cached_memory_bytes + steady_state_floor_bytes = measured.annotation_bytes( + "## forward ##", "START", occurrence=active_occurrence + ) + backward_delta_bytes = measured.annotation_delta_bytes( + "## forward ##", "END", "## backward ##", "END", occurrence=active_occurrence + ) + peak_over_floor_bytes = measured.dynamic_peak_bytes - steady_state_floor_bytes + memory_accounting = MemoryAccountingBreakdown( + theoretical_model_memory_bytes=theoretical_model_memory_bytes, + measured_active_tensor_memory_bytes=measured_active_tensor_memory_bytes, + measured_reserved_cached_memory_bytes=measured_reserved_cached_memory_bytes, + unexplained_gap_bytes=active_allocated_bytes - measured_active_tensor_memory_bytes, + measured_total_reserved_bytes=measured_total_reserved_bytes, + measured_mosaic_static_memory_bytes=measured.static_memory_bytes, + tensor_vs_theory_gap_bytes=measured_active_tensor_memory_bytes + - theoretical_model_memory_bytes, + ) metrics = { "static_baseline": _approximate_metric( metric_name="static_baseline", @@ -442,23 +655,23 @@ def compare_theory_to_measurement( measured_bytes=measured.static_memory_bytes, tolerance=actual_tolerances.static_baseline_relative, ), - "forward_end_dynamic": _approximate_metric( - metric_name="forward_end_dynamic", - predicted_bytes=float(theory.forward_dynamic_estimate_bytes), - measured_bytes=measured.annotation_bytes("## forward ##", "END"), - tolerance=actual_tolerances.forward_dynamic_relative, + "steady_state_floor": _approximate_metric( + metric_name="steady_state_floor", + predicted_bytes=float(theory.optimizer_dynamic_estimate_bytes), + measured_bytes=steady_state_floor_bytes, + tolerance=actual_tolerances.steady_state_floor_relative, ), - "backward_end_dynamic": _approximate_metric( - metric_name="backward_end_dynamic", - predicted_bytes=float(theory.backward_dynamic_estimate_bytes), - measured_bytes=measured.annotation_bytes("## backward ##", "END"), - tolerance=actual_tolerances.backward_dynamic_relative, + "backward_delta": _approximate_metric( + metric_name="backward_delta", + predicted_bytes=float(theory.gradient_bytes), + measured_bytes=backward_delta_bytes, + tolerance=actual_tolerances.backward_delta_relative, ), - "optimizer_end_dynamic": _approximate_metric( - metric_name="optimizer_end_dynamic", - predicted_bytes=float(theory.optimizer_dynamic_estimate_bytes), - measured_bytes=measured.annotation_bytes("## optimizer ##", "END"), - tolerance=actual_tolerances.optimizer_dynamic_relative, + "peak_over_floor": _approximate_metric( + metric_name="peak_over_floor", + predicted_bytes=float(theory.optimizer_state_bytes), + measured_bytes=peak_over_floor_bytes, + tolerance=actual_tolerances.peak_over_floor_relative, ), "dynamic_peak_lower_bound": _lower_bound_metric( metric_name="dynamic_peak_lower_bound", @@ -471,8 +684,9 @@ def compare_theory_to_measurement( theory=theory, measured=measured, metrics=metrics, - notes=notes, - possible_gap_sources=possible_gap_sources, + memory_accounting=memory_accounting, + notes=actual_narrative.notes, + possible_gap_sources=actual_narrative.possible_gap_sources, ) @@ -543,6 +757,100 @@ def _parse_annotation_measurement(key: str, value: object) -> AnnotationMeasurem ) +def _parse_allocator_memory_state(value: object) -> AllocatorMemoryState: + """Parse a measured allocator-state mapping.""" + mapping = _require_mapping(value, "allocator_state") + allocated_bytes = _require_float( + mapping.get("allocated_bytes"), "allocator_state.allocated_bytes" + ) + reserved_bytes = _require_float(mapping.get("reserved_bytes"), "allocator_state.reserved_bytes") + reserved_cached_bytes = _require_float( + mapping.get("reserved_cached_bytes"), "allocator_state.reserved_cached_bytes" + ) + return AllocatorMemoryState( + allocated_bytes=allocated_bytes, + allocated_gib=_require_float(mapping.get("allocated_gib"), "allocator_state.allocated_gib"), + reserved_bytes=reserved_bytes, + reserved_gib=_require_float(mapping.get("reserved_gib"), "allocator_state.reserved_gib"), + reserved_cached_bytes=reserved_cached_bytes, + reserved_cached_gib=_require_float( + mapping.get("reserved_cached_gib"), "allocator_state.reserved_cached_gib" + ), + ) + + +def _parse_live_tensor_accounting(value: object) -> LiveTensorAccounting: + """Parse a live-tensor accounting mapping.""" + mapping = _require_mapping(value, "live_tensor_accounting") + categories_mapping = _require_mapping( + mapping.get("categories"), "live_tensor_accounting.categories" + ) + top_other_payload = mapping.get("top_other_tensors") + top_other_tensors: list[LiveTensorDescriptor] = [] + if top_other_payload is not None: + if not isinstance(top_other_payload, list): + message = "live_tensor_accounting.top_other_tensors must be a list" + raise TypeError(message) + top_other_tensors = [ + _parse_live_tensor_descriptor(item, index) + for index, item in enumerate(top_other_payload) + ] + + return LiveTensorAccounting( + total_live_tensor_bytes=_require_float( + mapping.get("total_live_tensor_bytes"), "live_tensor_accounting.total_live_tensor_bytes" + ), + total_live_tensor_gib=_require_float( + mapping.get("total_live_tensor_gib"), "live_tensor_accounting.total_live_tensor_gib" + ), + categories={ + key: _parse_tensor_category_summary(category_value, key) + for key, category_value in categories_mapping.items() + }, + top_other_tensors=tuple(top_other_tensors), + ) + + +def _parse_tensor_category_summary(value: object, key: str) -> TensorCategorySummary: + """Parse one live tensor category summary.""" + mapping = _require_mapping(value, f"live_tensor_accounting.categories[{key}]") + return TensorCategorySummary( + bytes=_require_float( + mapping.get("bytes"), f"live_tensor_accounting.categories[{key}].bytes" + ), + gib=_require_float(mapping.get("gib"), f"live_tensor_accounting.categories[{key}].gib"), + ) + + +def _parse_live_tensor_descriptor(value: object, index: int) -> LiveTensorDescriptor: + """Parse one unmatched live tensor descriptor.""" + mapping = _require_mapping(value, f"live_tensor_accounting.top_other_tensors[{index}]") + shape_payload = mapping.get("shape") + if not isinstance(shape_payload, list) or not all( + isinstance(item, int) for item in shape_payload + ): + message = ( + f"live_tensor_accounting.top_other_tensors[{index}].shape must be a list of integers" + ) + raise TypeError(message) + return LiveTensorDescriptor( + bytes=_require_float( + mapping.get("bytes"), f"live_tensor_accounting.top_other_tensors[{index}].bytes" + ), + gib=_require_float( + mapping.get("gib"), f"live_tensor_accounting.top_other_tensors[{index}].gib" + ), + shape=tuple(cast("list[int]", shape_payload)), + dtype=_require_str( + mapping.get("dtype"), f"live_tensor_accounting.top_other_tensors[{index}].dtype" + ), + requires_grad=_require_bool( + mapping.get("requires_grad"), + f"live_tensor_accounting.top_other_tensors[{index}].requires_grad", + ), + ) + + def _require_mapping(value: object, field_name: str) -> Mapping[str, object]: """Validate that a value is a mapping.""" if not isinstance(value, dict): @@ -573,3 +881,11 @@ def _require_int(value: object, field_name: str) -> int: message = f"{field_name} must be an integer" raise TypeError(message) return value + + +def _require_bool(value: object, field_name: str) -> bool: + """Validate that a value is a boolean.""" + if not isinstance(value, bool): + message = f"{field_name} must be a boolean" + raise TypeError(message) + return value diff --git a/src/stellatscale/models.py b/src/stellatscale/models.py index 396baf7..b46d7e3 100644 --- a/src/stellatscale/models.py +++ b/src/stellatscale/models.py @@ -67,6 +67,68 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.model(x) +class _SingleLinearBackbone(nn.Module): + """Minimal wrapper exposing a named linear module for PEFT adaptation.""" + + def __init__(self, linear: nn.Linear) -> None: + super().__init__() + self.linear = linear + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Apply the wrapped linear layer.""" + return self.linear(x) + + +class LoRALinear(nn.Module): + """Single linear layer adapted with the official PEFT LoRA wrapper.""" + + def __init__(self, base: nn.Linear, rank: int, alpha: int = 1) -> None: + super().__init__() + base.requires_grad_(requires_grad=False) + backbone = _SingleLinearBackbone(base) + lora_config = LoraConfig(r=rank, lora_alpha=alpha, target_modules=["linear"], bias="none") + self.model = get_peft_model(cast("Any", backbone), lora_config) + + @property + def base_layer(self) -> nn.Linear: + """Return the underlying frozen dense projection.""" + base_model = cast("Any", self.model.base_model) + return cast("nn.Linear", base_model.model.linear.base_layer) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Run the LoRA-adapted linear layer.""" + return self.model(x) + + +class StelLALinear(nn.Module): + """Single linear layer adapted with the official PEFT StelLA wrapper.""" + + def __init__(self, base: nn.Linear, rank: int, alpha: int = 1) -> None: + super().__init__() + base.requires_grad_(requires_grad=False) + backbone = _SingleLinearBackbone(base) + stella_config = StellaConfig( + r=rank, + lora_alpha=alpha, + target_modules=["linear"], + bias="none", + stella_grad_scaling=float(base.out_features), + stella_retraction="exp_map", + ) + self.model = get_peft_model(cast("Any", backbone), stella_config) + StelLAAdamW.set_current_stella_model(cast("_StellaHookModel", self.model)) + + @property + def base_layer(self) -> nn.Linear: + """Return the underlying frozen dense projection.""" + base_model = cast("Any", self.model.base_model) + return cast("nn.Linear", base_model.model.linear.base_layer) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Run the StelLA-adapted linear layer.""" + return self.model(x) + + # ── StelLA Transformer (via official stella + PEFT) ────────────────────────── diff --git a/tests/test_memory_experiment.py b/tests/test_memory_experiment.py index a6b094d..b3af424 100644 --- a/tests/test_memory_experiment.py +++ b/tests/test_memory_experiment.py @@ -6,14 +6,15 @@ from torch import nn from stellatscale.memory_experiment import ( + ComparisonNarrative, ComparisonTolerances, - FrozenLoRALinear, LinearModelVariant, MemoryExperimentConfig, MemorySummary, build_theoretical_summary, compare_theory_to_measurement, ) +from stellatscale.models import LoRALinear, StelLAAdamW, StelLALinear def test_dense_theoretical_accounting_matches_exact_counts() -> None: @@ -22,7 +23,7 @@ def test_dense_theoretical_accounting_matches_exact_counts() -> None: batch_size=16, in_features=4096, out_features=4096, lora_rank=16 ) - summary = build_theoretical_summary(config, LinearModelVariant.DENSE) + summary = build_theoretical_summary(config, LinearModelVariant.LINEAR) assert summary.resident_parameter_bytes == 4096 * 4096 * 4 assert summary.trainable_parameter_bytes == 4096 * 4096 * 4 @@ -31,13 +32,13 @@ def test_dense_theoretical_accounting_matches_exact_counts() -> None: assert summary.static_baseline_bytes == (4096 * 4096 * 4) + (16 * 4096 * 4) + (16 * 4096 * 4) -def test_frozen_lora_theoretical_accounting_matches_exact_counts() -> None: - """Frozen-LoRA accounting should only charge gradients and optimizer state to adapters.""" +def test_lora_theoretical_accounting_matches_exact_counts() -> None: + """LoRA accounting should only charge gradients and optimizer state to adapters.""" config = MemoryExperimentConfig( batch_size=16, in_features=4096, out_features=4096, lora_rank=16 ) - summary = build_theoretical_summary(config, LinearModelVariant.FROZEN_LORA) + summary = build_theoretical_summary(config, LinearModelVariant.LINEAR_LORA) adapter_bytes = 16 * (4096 + 4096) * 4 assert summary.resident_parameter_bytes == (4096 * 4096 * 4) + adapter_bytes @@ -46,24 +47,78 @@ def test_frozen_lora_theoretical_accounting_matches_exact_counts() -> None: assert summary.optimizer_state_bytes == 16 * (4096 + 4096) * 8 -def test_frozen_lora_linear_keeps_base_frozen_and_optimizer_state_scoped() -> None: - """The frozen base weight should not receive gradients or optimizer state.""" - model = FrozenLoRALinear(nn.Linear(8, 8, bias=False), rank=2) +def test_stella_theoretical_accounting_matches_exact_counts() -> None: + """StelLA accounting should charge full Adam state to all trainable PEFT factors.""" + config = MemoryExperimentConfig( + batch_size=16, in_features=4096, out_features=4096, lora_rank=16 + ) + + summary = build_theoretical_summary(config, LinearModelVariant.LINEAR_STELLA) + + stella_trainable_bytes = ((16 * (4096 + 4096)) + (16**2)) * 4 + assert summary.resident_parameter_bytes == (4096 * 4096 * 4) + stella_trainable_bytes + assert summary.trainable_parameter_bytes == stella_trainable_bytes + assert summary.gradient_bytes == stella_trainable_bytes + assert summary.optimizer_state_bytes == ((16 * (4096 + 4096)) + (16**2)) * 8 + + +def test_lora_linear_keeps_base_frozen_and_optimizer_state_scoped() -> None: + """The base weight should stay frozen and only LoRA PEFT weights should get Adam state.""" + model = LoRALinear(nn.Linear(8, 8, bias=False), rank=2) inputs = torch.randn(4, 8) labels = torch.randn(4, 8) adam = torch.optim.AdamW( [parameter for parameter in model.parameters() if parameter.requires_grad], lr=0.1 ) + named_parameters = dict(model.named_parameters()) prediction = model(inputs) loss = nn.functional.mse_loss(prediction, labels) loss.backward() adam.step() - assert model.base.weight.grad is None - assert model.base.weight not in adam.state - assert model.lora_a.weight in adam.state - assert model.lora_b.weight in adam.state + assert model.base_layer.weight.grad is None + assert model.base_layer.weight not in adam.state + assert named_parameters["model.base_model.model.linear.lora_A.default.weight"] in adam.state + assert named_parameters["model.base_model.model.linear.lora_B.default.weight"] in adam.state + + +def test_stella_linear_keeps_base_frozen_and_optimizer_state_scoped() -> None: + """The base weight should stay frozen. + + StelLAAdamW should track all trainable PEFT factors. + """ + model = StelLALinear(nn.Linear(8, 8, bias=False), rank=2) + inputs = torch.randn(4, 8) + labels = torch.randn(4, 8) + optimizer = StelLAAdamW( + (parameter for parameter in model.parameters() if parameter.requires_grad), lr=0.1 + ) + named_parameters = dict(model.named_parameters()) + + prediction = model(inputs) + loss = nn.functional.mse_loss(prediction, labels) + loss.backward() + optimizer.step() + + assert model.base_layer.weight.grad is None + assert model.base_layer.weight not in optimizer.state + assert ( + named_parameters["model.base_model.model.linear.stella_U.default.weight"].grad is not None + ) + assert ( + named_parameters["model.base_model.model.linear.stella_Vt.default.weight"].grad is not None + ) + assert ( + named_parameters["model.base_model.model.linear.stella_U.default.weight"] in optimizer.state + ) + assert ( + named_parameters["model.base_model.model.linear.stella_S.default.weight"] in optimizer.state + ) + assert ( + named_parameters["model.base_model.model.linear.stella_Vt.default.weight"] + in optimizer.state + ) def test_comparison_report_preserves_theoretical_gap_information() -> None: @@ -71,17 +126,50 @@ def test_comparison_report_preserves_theoretical_gap_information() -> None: config = MemoryExperimentConfig( batch_size=16, in_features=4096, out_features=4096, lora_rank=16 ) - theory = build_theoretical_summary(config, LinearModelVariant.FROZEN_LORA) + expected_linear_lora_steady_state_bytes = 69206016.0 + expected_reserved_cache_bytes = 17039360.0 + theory = build_theoretical_summary(config, LinearModelVariant.LINEAR_LORA) measured = MemorySummary.from_mapping( { - "name": "frozen_lora", + "name": "linear_lora", "dynamic_peak_bytes": 2359296.0, "dynamic_peak_gib": 0.002197265625, "static_memory_bytes": 85196800.0, "static_memory_gib": 0.079345703125, "overall_peak_bytes": 87556096.0, "overall_peak_gib": 0.08154296875, + "allocator_state": { + "allocated_bytes": expected_linear_lora_steady_state_bytes, + "allocated_gib": 0.064453125, + "reserved_bytes": 86245376.0, + "reserved_gib": 0.080322265625, + "reserved_cached_bytes": expected_reserved_cache_bytes, + "reserved_cached_gib": 0.015869140625, + }, + "live_tensor_accounting": { + "total_live_tensor_bytes": expected_linear_lora_steady_state_bytes, + "total_live_tensor_gib": 0.064453125, + "categories": { + "parameters": {"bytes": 67633152.0, "gib": 0.06298828125}, + "gradients": {"bytes": 0.0, "gib": 0.0}, + "optimizer_state": {"bytes": 1048576.0, "gib": 0.0009765625}, + "inputs": {"bytes": 262144.0, "gib": 0.000244140625}, + "labels": {"bytes": 262144.0, "gib": 0.000244140625}, + "other": {"bytes": 0.0, "gib": 0.0}, + }, + "top_other_tensors": [], + }, "annotation_memory": { + "## forward ##_START": { + "annotation": { + "stage": "START", + "name": "## forward ##", + "device": 0, + "time_us": 0, + }, + "memory_bytes": 1310720.0, + "memory_gib": 0.001220703125, + }, "## forward ##_END": { "annotation": { "stage": "END", @@ -89,8 +177,8 @@ def test_comparison_report_preserves_theoretical_gap_information() -> None: "device": 0, "time_us": 1, }, - "memory_bytes": 263168.0, - "memory_gib": 0.00024509429931640625, + "memory_bytes": 1311744.0, + "memory_gib": 0.0012216567993164062, }, "## backward ##_END": { "annotation": { @@ -99,18 +187,8 @@ def test_comparison_report_preserves_theoretical_gap_information() -> None: "device": 0, "time_us": 2, }, - "memory_bytes": 786432.0, - "memory_gib": 0.000732421875, - }, - "## optimizer ##_END": { - "annotation": { - "stage": "END", - "name": "## optimizer ##", - "device": 0, - "time_us": 3, - }, - "memory_bytes": 1310720.0, - "memory_gib": 0.001220703125, + "memory_bytes": 1835008.0, + "memory_gib": 0.001708984375, }, }, "files": {}, @@ -121,14 +199,35 @@ def test_comparison_report_preserves_theoretical_gap_information() -> None: theory, measured, tolerances=ComparisonTolerances(static_baseline_relative=0.10), - notes=("Preserve the error; it may reflect theory, implementation, or runtime behavior.",), - possible_gap_sources=( - "External GPU workload from scripts like scripts/gpu_keepalive_loop.sh can perturb allocator baselines if running concurrently.", + narrative=ComparisonNarrative( + notes=( + "Preserve the error; it may reflect theory, implementation, or runtime behavior.", + ), + possible_gap_sources=( + "External GPU workload from scripts like scripts/gpu_keepalive_loop.sh can perturb allocator baselines if running concurrently.", + ), ), ) - assert "static_baseline" in report.failing_metrics + assert report.failing_metrics == ("static_baseline",) + assert report.metrics["steady_state_floor"].within_tolerance + assert report.metrics["backward_delta"].within_tolerance + assert report.metrics["peak_over_floor"].within_tolerance assert report.metrics["dynamic_peak_lower_bound"].within_tolerance + assert ( + report.memory_accounting.theoretical_model_memory_bytes + == expected_linear_lora_steady_state_bytes + ) + assert ( + report.memory_accounting.measured_active_tensor_memory_bytes + == expected_linear_lora_steady_state_bytes + ) + assert ( + report.memory_accounting.measured_reserved_cached_memory_bytes + == expected_reserved_cache_bytes + ) + assert report.memory_accounting.unexplained_gap_bytes == 0.0 + assert report.memory_accounting.tensor_vs_theory_gap_bytes == 0.0 assert "gpu_keepalive_loop.sh" in report.possible_gap_sources[0] @@ -137,17 +236,50 @@ def test_dense_measurement_agreement_is_reasonable_for_major_metrics() -> None: config = MemoryExperimentConfig( batch_size=16, in_features=4096, out_features=4096, lora_rank=16 ) - theory = build_theoretical_summary(config, LinearModelVariant.DENSE) + expected_linear_steady_state_bytes = 201850880.0 + expected_non_tensor_gap_bytes = 16777216.0 + theory = build_theoretical_summary(config, LinearModelVariant.LINEAR) measured = MemorySummary.from_mapping( { - "name": "dense", + "name": "linear", "dynamic_peak_bytes": 285736960.0, "dynamic_peak_gib": 0.26611328125, "static_memory_bytes": 67633152.0, "static_memory_gib": 0.06298828125, "overall_peak_bytes": 353370112.0, "overall_peak_gib": 0.3291015625, + "allocator_state": { + "allocated_bytes": 218628096.0, + "allocated_gib": 0.18798828125, + "reserved_bytes": 218628096.0, + "reserved_gib": 0.18798828125, + "reserved_cached_bytes": 0.0, + "reserved_cached_gib": 0.0, + }, + "live_tensor_accounting": { + "total_live_tensor_bytes": expected_linear_steady_state_bytes, + "total_live_tensor_gib": 0.18798828125, + "categories": { + "parameters": {"bytes": 67108864.0, "gib": 0.0625}, + "gradients": {"bytes": 0.0, "gib": 0.0}, + "optimizer_state": {"bytes": 134217728.0, "gib": 0.125}, + "inputs": {"bytes": 262144.0, "gib": 0.000244140625}, + "labels": {"bytes": 262144.0, "gib": 0.000244140625}, + "other": {"bytes": 0.0, "gib": 0.0}, + }, + "top_other_tensors": [], + }, "annotation_memory": { + "## forward ##_START": { + "annotation": { + "stage": "START", + "name": "## forward ##", + "device": 0, + "time_us": 0, + }, + "memory_bytes": 151519232.0, + "memory_gib": 0.14111328125, + }, "## forward ##_END": { "annotation": { "stage": "END", @@ -155,8 +287,8 @@ def test_dense_measurement_agreement_is_reasonable_for_major_metrics() -> None: "device": 0, "time_us": 1, }, - "memory_bytes": 8781824.0, - "memory_gib": 0.0081787109375, + "memory_bytes": 151519232.0, + "memory_gib": 0.14111328125, }, "## backward ##_END": { "annotation": { @@ -165,18 +297,8 @@ def test_dense_measurement_agreement_is_reasonable_for_major_metrics() -> None: "device": 0, "time_us": 2, }, - "memory_bytes": 84410368.0, - "memory_gib": 0.07861328125, - }, - "## optimizer ##_END": { - "annotation": { - "stage": "END", - "name": "## optimizer ##", - "device": 0, - "time_us": 3, - }, - "memory_bytes": 151519232.0, - "memory_gib": 0.14111328125, + "memory_bytes": 218628096.0, + "memory_gib": 0.20361328125, }, }, "files": {}, @@ -186,5 +308,13 @@ def test_dense_measurement_agreement_is_reasonable_for_major_metrics() -> None: report = compare_theory_to_measurement(theory, measured) assert report.metrics["static_baseline"].within_tolerance - assert report.metrics["optimizer_end_dynamic"].within_tolerance + assert report.metrics["steady_state_floor"].within_tolerance + assert report.metrics["backward_delta"].within_tolerance + assert report.metrics["peak_over_floor"].within_tolerance assert report.metrics["dynamic_peak_lower_bound"].within_tolerance + assert ( + report.memory_accounting.theoretical_model_memory_bytes + == expected_linear_steady_state_bytes + ) + assert report.memory_accounting.unexplained_gap_bytes == expected_non_tensor_gap_bytes + assert report.memory_accounting.tensor_vs_theory_gap_bytes == 0.0