Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions areal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,41 @@

from .version import __version__ # noqa

from .infra import (
RolloutController,
StalenessManager,
TrainController,
WorkflowExecutor,
current_platform,
workflow_context,

_INFRA_NAMES = frozenset(
{
"RolloutController",
"StalenessManager",
"TrainController",
"WorkflowExecutor",
"current_platform",
"workflow_context",
}
)

_TRAINER_NAMES = frozenset(
{
"DPOTrainer",
"PPOTrainer",
"RWTrainer",
"SFTTrainer",
}
)


def __getattr__(name: str):
if name in ("DPOTrainer", "PPOTrainer", "RWTrainer", "SFTTrainer"):
from .trainer import DPOTrainer, PPOTrainer, RWTrainer, SFTTrainer

_map = {
"DPOTrainer": DPOTrainer,
"PPOTrainer": PPOTrainer,
"RWTrainer": RWTrainer,
"SFTTrainer": SFTTrainer,
}
globals().update(_map)
return _map[name]
if name in _INFRA_NAMES:
from . import infra as _infra

value = getattr(_infra, name)
globals()[name] = value
return value
if name in _TRAINER_NAMES:
from . import trainer as _trainer

value = getattr(_trainer, name)
globals()[name] = value
return value
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


Expand Down
14 changes: 14 additions & 0 deletions areal/experimental/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-License-Identifier: Apache-2.0

"""AReaL operator CLI — companion to the v2 microservice control plane.

This package exposes a single ``areal`` console-script that drives the v2
service gateways (inference / agent / training / weight-update) from a
shell, rather than from a Python script that has to instantiate the
matching controller. It is intentionally light at import time so that
adding a verb in a follow-up PR does not pull torch / ray / megatron /
sglang / vllm into the parser-construction path.

The full per-verb design surface is tracked in the upstream design
discussion issue.
"""
1 change: 1 addition & 0 deletions areal/experimental/cli/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SPDX-License-Identifier: Apache-2.0
49 changes: 49 additions & 0 deletions areal/experimental/cli/commands/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: Apache-2.0

"""``areal agent`` — agent service operator console (scaffold).

Drives an agent service (gateway + router + N (worker, data-proxy) pairs)
for session-centric operator and debugging work. No verbs are implemented
in this scaffold release; this module only reserves the ``areal agent``
command name and tells the user what is coming.

The agent CLI is session-centric (not model-centric like ``areal inf``).
Sessions can negotiate an RL session key with a configured inference
service when they start, enabling online RL trajectory tracking.
"""

from __future__ import annotations

import argparse

_DESCRIPTION = """\
Operate an agent service: gateway + router + (worker, data-proxy) pairs.
Session-centric: the primary unit of interaction is an agent session,
not a model.

NO VERBS IMPLEMENTED YET. This namespace currently only reserves the
`areal agent ...` command surface.

Planned verb surface (flag matrices live in the design discussion issue):
run launch router + N pairs + gateway
stop tear them down
status health for one service
ps list locally known services
logs show gateway / router / worker / data-proxy logs

State lives under ~/.areal/agent/.
"""


def add_parser(subparsers: argparse._SubParsersAction) -> None:
p = subparsers.add_parser(
"agent",
help="Operate an agent service (scaffold — no verbs yet).",
description=_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.set_defaults(func=_handle)


def _handle(_: argparse.Namespace) -> int:
return 0
43 changes: 43 additions & 0 deletions areal/experimental/cli/commands/inf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-License-Identifier: Apache-2.0

"""``areal inf`` — inference service operator console (scaffold).

Drives an inference service (gateway + router + optional model backends)
for day-to-day operator and debugging work. No verbs are implemented in
this scaffold release; this module only reserves the ``areal inf``
command name and tells the user what is coming.
"""

from __future__ import annotations

import argparse

_DESCRIPTION = """\
Operate an inference service: gateway + router + optional model backends.

NO VERBS IMPLEMENTED YET. This namespace currently only reserves the
`areal inf ...` command surface.

Planned verb surface (flag matrices live in the design discussion issue):
run launch gateway + router (optionally with --model inline)
stop tear them down
status health for one service
ps list locally known services
logs show gateway / router / model logs

State lives under ~/.areal/inf/.
"""


def add_parser(subparsers: argparse._SubParsersAction) -> None:
p = subparsers.add_parser(
"inf",
help="Operate an inference service (scaffold — no verbs yet).",
description=_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.set_defaults(func=_handle)


def _handle(_: argparse.Namespace) -> int:
return 0
50 changes: 50 additions & 0 deletions areal/experimental/cli/commands/train/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# SPDX-License-Identifier: Apache-2.0

"""``areal train`` — training job submitter (scaffold).

Wraps the launch lifecycle of a training driver. Unlike ``areal inf`` /
``areal agent`` (which manage long-running services), ``areal train``
treats each run as a job: it terminates, and the CLI's job is purely
lifecycle wrapping. The scheduling decision (local / slurm / ray) stays
inside the driver, decided by ``config.scheduler.type`` — the CLI does
not pick a scheduler.

No verbs are implemented in this scaffold release.
"""

from __future__ import annotations

import argparse

_DESCRIPTION = """\
Submit and observe training jobs. Job-shaped (terminates), not
service-shaped — the CLI wraps lifecycle only and does not choose the
scheduler (that decision lives in the driver via config.scheduler.type).

NO VERBS IMPLEMENTED YET. This namespace currently only reserves the
`areal train ...` command surface.

Planned verb surface (flag matrices live in the design discussion issue):
run run a driver in the foreground (small jobs, debugging)
start spawn a detached driver process (cluster jobs)
stop signal a running job by name
ps list locally tracked jobs
status status of one job
logs tail a job's combined stdout/stderr

State lives under ~/.areal/train/.
"""


def add_parser(subparsers: argparse._SubParsersAction) -> None:
p = subparsers.add_parser(
"train",
help="Submit and observe training jobs (scaffold — no verbs yet).",
description=_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.set_defaults(func=_handle)


def _handle(_: argparse.Namespace) -> int:
return 0
54 changes: 54 additions & 0 deletions areal/experimental/cli/commands/weight_update/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# SPDX-License-Identifier: Apache-2.0

"""``areal weight-update`` — weight-sync diagnostic console (scaffold).

Drives the weight-update service that sits between training and
inference. The operator-facing surface is small and diagnostic-only:
humans don't invoke `/connect` / `/update_weights` / `/disconnect`
directly during normal use — those are called by adapter code inside
the training and inference engines. The CLI's job is to show whether
the sync is healthy, which (train, inference) pairs are connected, and
where the logs are.

No verbs are implemented in this scaffold release.

The CLI-surface namespace is ``weight-update`` (hyphenated, matching
the v2 service naming). The Python module is ``weight_update`` because
identifiers can't contain hyphens.
"""

from __future__ import annotations

import argparse

_DESCRIPTION = """\
Diagnose the weight-update service that bridges training and inference.

NO VERBS IMPLEMENTED YET. This namespace currently only reserves the
`areal weight-update ...` command surface.

Planned verb surface (flag matrices live in the design discussion issue):
status is the gateway alive? how many pairs are connected?
ps list locally known weight-update services
logs tail the gateway log

Note: there is no `run` verb in the first cut — in the v2 flow the
weight-update gateway is brought up by the training-side controller,
not by the operator.

State lives under ~/.areal/weight-update/.
"""


def add_parser(subparsers: argparse._SubParsersAction) -> None:
p = subparsers.add_parser(
"weight-update",
help="Diagnose weight-sync state (scaffold — no verbs yet).",
description=_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.set_defaults(func=_handle)


def _handle(_: argparse.Namespace) -> int:
return 0
82 changes: 82 additions & 0 deletions areal/experimental/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# SPDX-License-Identifier: Apache-2.0

"""Top-level entry point for the ``areal`` console-script.

This module wires the four sub-CLI namespaces (`inf`, `agent`, `train`,
`weight-update`) into a single argparse tree. Each namespace lives under
``areal/experimental/cli/commands/<namespace>/`` and exports an
``add_parser(subparsers)`` function; this file imports them and registers
them. No verb behavior is implemented at this level.

The import path is kept deliberately light: only stdlib and the namespace
``__init__`` modules are touched here. Heavy dependencies (torch, ray,
megatron, sglang, vllm, fastapi, …) must never appear on the import path
that ``areal --help`` triggers — the invariant is locked by
``tests/experimental/test_cli_lightness.py``.
"""

from __future__ import annotations

import argparse
import sys

from areal.experimental.cli.commands import agent as cmd_agent
from areal.experimental.cli.commands import inf as cmd_inf
from areal.experimental.cli.commands import train as cmd_train
from areal.experimental.cli.commands import weight_update as cmd_weight_update
from areal.version import __version__

_DESCRIPTION = """\
AReaL operator CLI for the v2 microservice architecture.

Each namespace drives one of the v2 services. Verbs land incrementally;
no verbs are implemented in this scaffold release — each namespace's
--help describes its planned surface and points at the design discussion.

Namespaces:
inf Operate an inference service (gateway + router + models)
agent Operate an agent service (gateway + router + sessions)
train Submit and observe training jobs
weight-update Diagnose weight-sync state between train and inference

Run `areal <namespace> --help` for what's planned (and what's available
today). State files for each namespace live under ~/.areal/<namespace>/.
"""


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="areal",
description=_DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--version",
action="version",
version=f"areal {__version__}",
)
subparsers = parser.add_subparsers(
dest="namespace",
required=True,
metavar="NAMESPACE",
)
cmd_inf.add_parser(subparsers)
cmd_agent.add_parser(subparsers)
cmd_train.add_parser(subparsers)
cmd_weight_update.add_parser(subparsers)
return parser


def cli(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv if argv is not None else sys.argv[1:])
func = getattr(args, "func", None)
if func is None:
parser.print_help()
return 2
result = func(args)
return int(result) if isinstance(result, int) else 0


if __name__ == "__main__":
sys.exit(cli())
Loading
Loading