From 3ab7b2a456e9b919da5a299d4b28a7716960b42c Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 13 Nov 2025 15:31:17 -0800 Subject: [PATCH 01/44] Fixes docker and pyproject. --- Dockerfile | 36 +++++++++++------------------------- pyproject.toml | 15 ++++----------- 2 files changed, 15 insertions(+), 36 deletions(-) diff --git a/Dockerfile b/Dockerfile index 19b34af..3affffa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,22 @@ -ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2024.5 -FROM ${QIIME_BASE_IMAGE} as base +ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2024.10 +FROM ${QIIME_BASE_IMAGE} AS base ENV PYTHONUNBUFFERED=1 WORKDIR /app - -COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /uvx /bin/ - -ENV UV_COMPILE_BYTECODE=1 -ENV UV_LINK_MODE=copy - -# Development stage -FROM base as dev - -RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=uv.lock,target=uv.lock \ - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ - uv sync --frozen --no-install-project - -ENV PYTHONPATH=/app/src +FROM base AS dev COPY ./pyproject.toml ./uv.lock /app/ COPY ./README.md /app/ COPY ./src /app/src -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen +# Skip UV and use QIIME conda env +RUN pip install . + +RUN pip uninstall pyOpenSSL -y || true -# is this needed still? -RUN pip3 uninstall pyOpenSSL -y || true -# We can extend this if needed -FROM dev as production +FROM dev AS production +WORKDIR /app + +CMD ["adagio", "--help"] -# Set default command -CMD ["uv", "run", "adagio", "--help"] diff --git a/pyproject.toml b/pyproject.toml index 7e7e79d..caf9886 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,17 +3,12 @@ name = "adagio" version = "0.0.0" description = "Adagio command line tool" readme = "README.md" -requires-python = ">=3.11" -dependencies = [ - "rich>=14.1.0", - "typer>=0.17.4", -] +requires-python = ">=3.10" +dependencies = ["rich>=14.1.0", "typer>=0.17.4"] [dependency-groups] -dev = [ - "ruff>=0.13.0", -] +dev = ["ruff>=0.13.0"] [project.scripts] adagio = "adagio.cli:app" @@ -34,7 +29,5 @@ section-order = [ "standard-library", "third-party", "first-party", - "local-folder" + "local-folder", ] - - From 6096344527b34793114bc3d442224d1b84e31504 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 16 Feb 2026 20:35:34 -0800 Subject: [PATCH 02/44] Adds pipeline parsing --- pyproject.toml | 6 +- src/adagio/app/parsers/pipeline.py | 24 +++++ src/adagio/cli.py | 151 ++++++++--------------------- 3 files changed, 69 insertions(+), 112 deletions(-) create mode 100644 src/adagio/app/parsers/pipeline.py diff --git a/pyproject.toml b/pyproject.toml index caf9886..3b11e46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,11 @@ version = "0.0.0" description = "Adagio command line tool" readme = "README.md" requires-python = ">=3.10" -dependencies = ["rich>=14.1.0", "typer>=0.17.4"] +dependencies = [ + "cyclopts>=4.5.3", + "pydantic>=2.12.5", + "rich>=14.1.0", +] [dependency-groups] diff --git a/src/adagio/app/parsers/pipeline.py b/src/adagio/app/parsers/pipeline.py new file mode 100644 index 0000000..0a9c1ac --- /dev/null +++ b/src/adagio/app/parsers/pipeline.py @@ -0,0 +1,24 @@ +"""This is a placeholder for the pipeline parser. It will be implemented in the future when we have a better understanding of the pipeline specification. + +This can and should be updated to something more robust +""" + +from pydantic import BaseModel +from typing import Any, List, Optional +from uuid import UUID + + +class Parameter(BaseModel): + id: UUID + name: str + required: bool + default: Optional[Any] = None + type: str + + +def parse_parameters(data: Any) -> List[Parameter]: + """Parse a list of parameter dictionaries into a list of Parameter objects.""" + parameters = [] + for param in data["spec"]["signature"]["parameters"]: + parameters.append(Parameter(**param)) + return parameters diff --git a/src/adagio/cli.py b/src/adagio/cli.py index 045ab85..f06831b 100644 --- a/src/adagio/cli.py +++ b/src/adagio/cli.py @@ -1,128 +1,57 @@ -from typing import Annotated -import typer +from __future__ import annotations + from pathlib import Path +from typing import Annotated +from cyclopts import App, Parameter from rich.console import Console -from rich.panel import Panel +import json +from .app.parsers.pipeline import parse_parameters -import time -import itertools -from rich.live import Live - -app = typer.Typer( - help="Adagio command line tool for processing pipelines created with the Adagio GUI." -) console = Console() -@app.command("hello") -def hello_cmd( - input_file: Annotated[ - Path, - typer.Option( - "--input", - "-i", - help="Help text", - exists=False, - file_okay=True, - dir_okay=False, - readable=True, - ), - ], - name: Annotated[ - str, typer.Option("--name", "-n", help="Say hello to someone else") - ], -): - """Say hello.""" - stick_figure = r""" - O - /|\ - / \ - """ +def _adagio_version_string() -> str: + try: + from importlib.metadata import PackageNotFoundError, version as get_version + except ImportError: # pragma: no cover + from importlib_metadata import PackageNotFoundError, version as get_version # type: ignore - message = ( - f"[bold cyan]Hello {name}, {input_file} looks like a great file![/bold cyan]" - ) + try: + return f"Adagio {get_version('adagio')}" + except PackageNotFoundError: + return "Adagio version unknown (not installed as a package)" - # Wrap the figure + message in a Rich panel for nicer output - console.print( - Panel.fit( - f"{stick_figure}\n{message}", - title="[yellow]Stick Figure[/yellow]", - border_style="green", - ) - ) + # name: Annotated[ + # str, + # Parameter( + # name=("--name", "-n"), + # help="Say hello to someone else", + # ), + # ], -@app.command("chicken") -def animate_big_chicken( - laps: int = typer.Option(1, help="How many times to go left→right→left."), - speed: float = typer.Option(0.08, help="Seconds between steps (lower = faster)."), -): - """Animate a multi-line chicken walking across the screen.""" - - # Two frames to fake wing flaps - frames = [ - [ - " __", - " <(o )___", - " ( ._>", - " `---'", - ], - [ - " __", - " <( -)___", - " (o ._>", - " `---'", - ], - ] - flap = itertools.cycle(frames) - - width = console.size.width - rightmost = max(10, width - 12) - - def render(pos: int, art: list[str]) -> str: - # Shift each line horizontally by pos - shifted = [" " * pos + line for line in art] - # Pad lines so Live keeps height/width stable - padded = [line.ljust(width) for line in shifted] - return "\n".join(padded) - - with Live( - render(0, next(flap)), console=console, refresh_per_second=30, transient=True - ) as live: - for _ in range(laps): - # Left → Right - for x in range(0, rightmost): - live.update(render(x, next(flap))) - time.sleep(speed) - # Right → Left - for x in range(rightmost, 0, -1): - live.update(render(x, next(flap))) - time.sleep(speed) - - console.print("[bold yellow]🐔 Big chicken says cluck![/bold yellow]") +app = App( + help="Adagio command line tool for processing pipelines created with the Adagio GUI.", + version=_adagio_version_string, +) -@app.callback(invoke_without_command=True) -def main_callback( - version: Annotated[bool, typer.Option("--version", help="Show version")] = False, +@app.command(name="run") +def run_cmd( + pipeline: Annotated[ + Path, + Parameter( + name=("--pipeline", "-p"), + help="Help text", + ), + ], ): - """Adagio command line tool version.""" - if version: - try: - from importlib.metadata import PackageNotFoundError - from importlib.metadata import version as get_version - except ImportError: - from importlib_metadata import PackageNotFoundError # type: ignore - from importlib_metadata import version as get_version # type: ignore - try: - package_version = get_version("adagio") - console.print(f"Adagio {package_version}") - except PackageNotFoundError: - console.print("Adagio version unknown (not installed as a package)") - - raise typer.Exit() + """Run an Adagio pipeline.""" + with open(pipeline, "r") as f: + data = json.load(f) + parameters = parse_parameters(data) + console.print(f"Paramerters: {parameters}") if __name__ == "__main__": From 290b212bcf02d66d58c370c881fe9ab481a5952e Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 16 Feb 2026 21:04:24 -0800 Subject: [PATCH 03/44] Adds pipeline parsing --- src/adagio/cli.py | 224 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 188 insertions(+), 36 deletions(-) diff --git a/src/adagio/cli.py b/src/adagio/cli.py index f06831b..dc580f5 100644 --- a/src/adagio/cli.py +++ b/src/adagio/cli.py @@ -1,58 +1,210 @@ -from __future__ import annotations - +import inspect +import json +import re +import sys from pathlib import Path -from typing import Annotated +from typing import Any, Annotated from cyclopts import App, Parameter from rich.console import Console -import json + from .app.parsers.pipeline import parse_parameters console = Console() -def _adagio_version_string() -> str: - try: - from importlib.metadata import PackageNotFoundError, version as get_version - except ImportError: # pragma: no cover - from importlib_metadata import PackageNotFoundError, version as get_version # type: ignore +def _extract_flag_value(argv: list[str], *flags: str) -> str | None: + """Supports: --flag value, -f value, --flag=value""" + flag_set = set(flags) + for i, tok in enumerate(argv): + if tok in flag_set: + return argv[i + 1] if i + 1 < len(argv) else None + for f in flags: + if tok.startswith(f + "="): + return tok.split("=", 1)[1] + return None + + +def _to_identifier(name: str) -> str: + """Turn arbitrary names into valid Python identifiers for **kwargs keys.""" + name = name.strip() + name = re.sub(r"[^0-9a-zA-Z_]+", "_", name) + if not name: + raise ValueError("Empty parameter name in pipeline file.") + if name[0].isdigit(): + name = "_" + name + return name + + +def _kebab(name: str) -> str: + return name.replace("_", "-") + + +# ---- Adapt these accessors to whatever parse_parameters returns ---- + + +def _spec_name(spec: Any) -> str: + return spec["name"] if isinstance(spec, dict) else getattr(spec, "name") + + +def _spec_required(spec: Any) -> bool: + if isinstance(spec, dict): + return bool(spec.get("required", False)) + return bool(getattr(spec, "required", False)) + + +def _spec_default(spec: Any) -> Any: + if isinstance(spec, dict): + return spec.get("default", None) + return getattr(spec, "default", None) + + +def _spec_help(spec: Any) -> str: + if isinstance(spec, dict): + return str(spec.get("help") or spec.get("description") or "") + return str(getattr(spec, "help", "") or getattr(spec, "description", "") or "") + + +def _spec_type(spec: Any) -> type: + """ + Optional: map a spec type -> python type. + If you don’t have types, just return str. + """ + t = None + if isinstance(spec, dict): + t = spec.get("type") or spec.get("type_") + else: + t = getattr(spec, "type", None) or getattr(spec, "type_", None) + + return {"str": str, "int": int, "float": float, "bool": bool}.get(str(t), str) - try: - return f"Adagio {get_version('adagio')}" - except PackageNotFoundError: - return "Adagio version unknown (not installed as a package)" - # name: Annotated[ - # str, - # Parameter( - # name=("--name", "-n"), - # help="Say hello to someone else", - # ), - # ], +# ---- The core: build a dynamic run() with signature + Annotated Parameter ---- -app = App( - help="Adagio command line tool for processing pipelines created with the Adagio GUI.", - version=_adagio_version_string, -) +def _build_dynamic_run(*, param_specs: list[Any]): + """ + Build run(pipeline=..., --dynamic-params...) where dynamic params and help + come from param_specs. + """ + # Map CLI param -> python identifier for kwargs + idents: list[tuple[str, str]] = [] # (ident, original_name) + annotations: dict[str, Any] = {} -@app.command(name="run") -def run_cmd( - pipeline: Annotated[ + # Fixed param: pipeline + annotations["pipeline"] = Annotated[ Path, Parameter( name=("--pipeline", "-p"), - help="Help text", + help="Path to the pipeline JSON file.", ), - ], -): - """Run an Adagio pipeline.""" - with open(pipeline, "r") as f: - data = json.load(f) - parameters = parse_parameters(data) - console.print(f"Paramerters: {parameters}") + ] + + parameters: list[inspect.Parameter] = [ + inspect.Parameter( + name="pipeline", + kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, + annotation=annotations["pipeline"], + ) + ] + + for spec in param_specs: + original = _spec_name(spec) + ident = _to_identifier(original) + idents.append((ident, original)) + + default = _spec_default(spec) + required = _spec_required(spec) + help_text = _spec_help(spec) + py_type = _spec_type(spec) + + opt = f"--{_kebab(original)}" # preserve original naming for CLI + # Required only when required==True and there is no default + is_required = bool(required and default is None) + + annotations[ident] = Annotated[ + py_type, + Parameter( + name=(opt,), + help=help_text or f"Pipeline parameter: {original}", + required=is_required, + ), + ] + + parameters.append( + inspect.Parameter( + name=ident, + kind=inspect.Parameter.KEYWORD_ONLY, + default=(default if default is not None else inspect._empty), + annotation=annotations[ident], + ) + ) + + def run(pipeline: Path, **kwargs: Any) -> None: + # Convert parsed kwargs back to the original pipeline param names + values: dict[str, Any] = {} + for ident, original in idents: + values[original] = kwargs.get(ident) + + console.print(f"[bold]Pipeline:[/bold] {pipeline}") + console.print("[bold]CLI values:[/bold]") + for k, v in values.items(): + console.print(f" {k} = {v!r}") + + # If you want: call your actual runner here using `values` + # run_pipeline(pipeline, values) + + run.__annotations__ = annotations + run.__signature__ = inspect.Signature(parameters) + run.__doc__ = ( + "Run an Adagio pipeline.\n\n" + "Dynamic parameters are loaded from the pipeline file and exposed as CLI options.\n" + "Use: adagio run --pipeline --help" + ) + return run + + +def main(argv: list[str] | None = None) -> None: + argv = sys.argv[1:] if argv is None else argv + + pipeline_str = _extract_flag_value(argv, "--pipeline", "-p") + + app = App( + name="adagio", + help="Adagio command line tool for processing pipelines created with the Adagio GUI.", + ) + + if not pipeline_str: + + @app.command + def run( + pipeline: Annotated[ + Path, + Parameter( + name=("--pipeline", "-p"), help="Path to the pipeline JSON file." + ), + ], + ): + """Run a pipeline (dynamic parameters come from the pipeline file).""" + raise SystemExit( + "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" + ) + + app(argv) + return + + pipeline_path = Path(pipeline_str) + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + + # Your existing loader: + param_specs = parse_parameters(data) + + dynamic_run = _build_dynamic_run(param_specs=param_specs) + app.command(dynamic_run, name="run") + + app(argv) if __name__ == "__main__": - app() + main() From 51317a1c07dcf4d0b8a2505682512de43f81134b Mon Sep 17 00:00:00 2001 From: John Chase Date: Sun, 22 Feb 2026 12:46:09 -0800 Subject: [PATCH 04/44] Adds pipeline cli interface --- pyproject.toml | 2 +- src/adagio/__init__.py | 4 +- src/adagio/app/parsers/pipeline.py | 63 ++++++-- src/adagio/cli.py | 246 ----------------------------- src/adagio/cli/__init__.py | 1 + src/adagio/cli/args.py | 49 ++++++ src/adagio/cli/dynamic.py | 135 ++++++++++++++++ src/adagio/cli/main.py | 67 ++++++++ src/adagio/cli/runner.py | 40 +++++ src/adagio/hello.py | 8 - 10 files changed, 347 insertions(+), 268 deletions(-) delete mode 100644 src/adagio/cli.py create mode 100644 src/adagio/cli/__init__.py create mode 100644 src/adagio/cli/args.py create mode 100644 src/adagio/cli/dynamic.py create mode 100644 src/adagio/cli/main.py create mode 100644 src/adagio/cli/runner.py delete mode 100644 src/adagio/hello.py diff --git a/pyproject.toml b/pyproject.toml index 41de87b..1f50115 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = ["cyclopts>=4.5.3", "pydantic>=2.12.5", "rich>=14.1.0"] dev = ["ruff>=0.13.0"] [project.scripts] -adagio = "adagio.cli:app" +adagio = "adagio.cli.main:main" [build-system] requires = ["uv_build>=0.8.17,<0.9.0"] diff --git a/src/adagio/__init__.py b/src/adagio/__init__.py index dde9fb7..c9c2ef6 100644 --- a/src/adagio/__init__.py +++ b/src/adagio/__init__.py @@ -1,3 +1 @@ -from .hello import hello - -__all__ = ["hello"] +__all__: list[str] = [] diff --git a/src/adagio/app/parsers/pipeline.py b/src/adagio/app/parsers/pipeline.py index 0a9c1ac..7989a69 100644 --- a/src/adagio/app/parsers/pipeline.py +++ b/src/adagio/app/parsers/pipeline.py @@ -1,12 +1,10 @@ -"""This is a placeholder for the pipeline parser. It will be implemented in the future when we have a better understanding of the pipeline specification. +"""Helpers for pulling promoted parameter specs from pipeline JSON.""" -This can and should be updated to something more robust -""" - -from pydantic import BaseModel from typing import Any, List, Optional from uuid import UUID +from pydantic import BaseModel + class Parameter(BaseModel): id: UUID @@ -16,9 +14,54 @@ class Parameter(BaseModel): type: str +class Input(BaseModel): + id: UUID + name: str + required: bool + type: str + + +def _extract_signature(data: Any) -> dict[str, Any]: + signature = ( + data.get("spec", {}).get("signature") + if isinstance(data, dict) + else None + ) or (data.get("signature") if isinstance(data, dict) else None) + + if not isinstance(signature, dict): + raise ValueError( + "Invalid pipeline: missing 'signature' section in pipeline JSON." + ) + + return signature + + def parse_parameters(data: Any) -> List[Parameter]: - """Parse a list of parameter dictionaries into a list of Parameter objects.""" - parameters = [] - for param in data["spec"]["signature"]["parameters"]: - parameters.append(Parameter(**param)) - return parameters + """Parse promoted parameters from supported pipeline JSON layouts. + + We currently accept either: + - {"spec": {"signature": {"parameters": [...]}}} + - {"signature": {"parameters": [...]} } + """ + signature = _extract_signature(data) + + raw_parameters = signature.get("parameters") + if not isinstance(raw_parameters, list): + raise ValueError( + "Invalid pipeline: missing 'signature.parameters' list in pipeline JSON." + ) + + return [Parameter(**param) for param in raw_parameters] + + +def parse_inputs(data: Any) -> List[Input]: + """Parse promoted inputs from supported pipeline JSON layouts.""" + signature = _extract_signature(data) + + raw_inputs = signature.get("inputs") + if not isinstance(raw_inputs, list): + raise ValueError( + "Invalid pipeline: missing 'signature.inputs' list in pipeline JSON." + ) + + return [Input(**input_item) for input_item in raw_inputs] diff --git a/src/adagio/cli.py b/src/adagio/cli.py deleted file mode 100644 index ebff6cf..0000000 --- a/src/adagio/cli.py +++ /dev/null @@ -1,246 +0,0 @@ -import inspect -import json -import re -import sys -from pathlib import Path -from typing import Any, Annotated - -from cyclopts import App, Parameter -from rich.console import Console - -from .app.parsers.pipeline import parse_parameters - - -console = Console() - - -def _extract_flag_value(argv: list[str], *flags: str) -> str | None: - """Supports: --flag value, -f value, --flag=value""" - flag_set = set(flags) - for i, tok in enumerate(argv): - if tok in flag_set: - return argv[i + 1] if i + 1 < len(argv) else None - for f in flags: - if tok.startswith(f + "="): - return tok.split("=", 1)[1] - return None - - -def _to_identifier(name: str) -> str: - """Turn arbitrary names into valid Python identifiers for **kwargs keys.""" - name = name.strip() - name = re.sub(r"[^0-9a-zA-Z_]+", "_", name) - if not name: - raise ValueError("Empty parameter name in pipeline file.") - if name[0].isdigit(): - name = "_" + name - return name - - -def _kebab(name: str) -> str: - return name.replace("_", "-") - - -# ---- Adapt these accessors to whatever parse_parameters returns ---- - - -def _spec_name(spec: Any) -> str: - return spec["name"] if isinstance(spec, dict) else getattr(spec, "name") - - -def _spec_required(spec: Any) -> bool: - if isinstance(spec, dict): - return bool(spec.get("required", False)) - return bool(getattr(spec, "required", False)) - - -def _spec_default(spec: Any) -> Any: - if isinstance(spec, dict): - return spec.get("default", None) - return getattr(spec, "default", None) - - -def _spec_help(spec: Any) -> str: - if isinstance(spec, dict): - return str(spec.get("help") or spec.get("description") or "") - return str(getattr(spec, "help", "") or getattr(spec, "description", "") or "") - - -def _spec_type(spec: Any) -> type: - """ - Optional: map a spec type -> python type. - If you don’t have types, just return str. - """ - t = None - if isinstance(spec, dict): - t = spec.get("type") or spec.get("type_") - else: - t = getattr(spec, "type", None) or getattr(spec, "type_", None) - - return {"str": str, "int": int, "float": float, "bool": bool}.get(str(t), str) - - -# @app.command("execute") -# def execute_cmd( -# pipeline: Annotated[ -# Path, -# typer.Option( -# "--input", -# "-i", -# help="Adagio created pipeline", -# exists=False, -# file_okay=True, -# dir_okay=False, -# readable=True, -# ), -# ], -# config: Annotated[ -# Path, -# typer.Option( -# "--config", -# "-c", -# help="Configuration file for the pipeline", -# exists=False, -# file_okay=True, -# dir_okay=False, -# readable=True, -# ), -# ], -# ): -# """Execute an Adagio created pipeline""" -# spec = parse_spec(pipeline) -# config = parse_config(config) -# -# process_job(spec, config) -# >>>>>>> dev - - -# ---- The core: build a dynamic run() with signature + Annotated Parameter ---- - - -def _build_dynamic_run(*, param_specs: list[Any]): - """ - Build run(pipeline=..., --dynamic-params...) where dynamic params and help - come from param_specs. - """ - # Map CLI param -> python identifier for kwargs - idents: list[tuple[str, str]] = [] # (ident, original_name) - - annotations: dict[str, Any] = {} - - # Fixed param: pipeline - annotations["pipeline"] = Annotated[ - Path, - Parameter( - name=("--pipeline", "-p"), - help="Path to the pipeline JSON file.", - ), - ] - - parameters: list[inspect.Parameter] = [ - inspect.Parameter( - name="pipeline", - kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, - annotation=annotations["pipeline"], - ) - ] - - for spec in param_specs: - original = _spec_name(spec) - ident = _to_identifier(original) - idents.append((ident, original)) - - default = _spec_default(spec) - required = _spec_required(spec) - help_text = _spec_help(spec) - py_type = _spec_type(spec) - - opt = f"--{_kebab(original)}" # preserve original naming for CLI - # Required only when required==True and there is no default - is_required = bool(required and default is None) - - annotations[ident] = Annotated[ - py_type, - Parameter( - name=(opt,), - help=help_text or f"Pipeline parameter: {original}", - required=is_required, - ), - ] - - parameters.append( - inspect.Parameter( - name=ident, - kind=inspect.Parameter.KEYWORD_ONLY, - default=(default if default is not None else inspect._empty), - annotation=annotations[ident], - ) - ) - - def run(pipeline: Path, **kwargs: Any) -> None: - # Convert parsed kwargs back to the original pipeline param names - values: dict[str, Any] = {} - for ident, original in idents: - values[original] = kwargs.get(ident) - - console.print(f"[bold]Pipeline:[/bold] {pipeline}") - console.print("[bold]CLI values:[/bold]") - for k, v in values.items(): - console.print(f" {k} = {v!r}") - - # If you want: call your actual runner here using `values` - # run_pipeline(pipeline, values) - - run.__annotations__ = annotations - run.__signature__ = inspect.Signature(parameters) - run.__doc__ = ( - "Run an Adagio pipeline.\n\n" - "Dynamic parameters are loaded from the pipeline file and exposed as CLI options.\n" - "Use: adagio run --pipeline --help" - ) - return run - - -def main(argv: list[str] | None = None) -> None: - argv = sys.argv[1:] if argv is None else argv - - pipeline_str = _extract_flag_value(argv, "--pipeline", "-p") - - app = App( - name="adagio", - help="Adagio command line tool for processing pipelines created with the Adagio GUI.", - ) - - if not pipeline_str: - - @app.command - def run( - pipeline: Annotated[ - Path, - Parameter( - name=("--pipeline", "-p"), help="Path to the pipeline JSON file." - ), - ], - ): - """Run a pipeline (dynamic parameters come from the pipeline file).""" - raise SystemExit( - "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" - ) - - app(argv) - return - - pipeline_path = Path(pipeline_str) - data = json.loads(pipeline_path.read_text(encoding="utf-8")) - - # Your existing loader: - param_specs = parse_parameters(data) - - dynamic_run = _build_dynamic_run(param_specs=param_specs) - app.command(dynamic_run, name="run") - - app(argv) - - -if __name__ == "__main__": - main() diff --git a/src/adagio/cli/__init__.py b/src/adagio/cli/__init__.py new file mode 100644 index 0000000..c9c2ef6 --- /dev/null +++ b/src/adagio/cli/__init__.py @@ -0,0 +1 @@ +__all__: list[str] = [] diff --git a/src/adagio/cli/args.py b/src/adagio/cli/args.py new file mode 100644 index 0000000..c3e06c0 --- /dev/null +++ b/src/adagio/cli/args.py @@ -0,0 +1,49 @@ +import re +from enum import StrEnum + + +class ParamType(StrEnum): + INPUT = "input" + PARAM = "param" + + +def promote_positional_pipeline(argv: list[str]) -> tuple[list[str], str | None]: + """Allow `adagio run ` by rewriting it to `--pipeline `.""" + if len(argv) < 2 or argv[0] != "run": + return argv, None + + candidate = argv[1] + if candidate.startswith("-"): + return argv, None + + rewritten = ["run", "--pipeline", candidate, *argv[2:]] + return rewritten, candidate + + +def extract_flag_value(argv: list[str], *flags: str) -> str | None: + """Supports: --flag value, -f value, --flag=value.""" + flag_set = set(flags) + for i, tok in enumerate(argv): + if tok in flag_set: + return argv[i + 1] if i + 1 < len(argv) else None + for flag in flags: + if tok.startswith(flag + "="): + return tok.split("=", 1)[1] + return None + + +def to_identifier(name: str, prefix: str | None = None) -> str: + """Turn arbitrary names into valid Python identifiers for kwargs keys.""" + clean = (name or "").strip() + clean = re.sub(r"[^0-9a-zA-Z_]+", "_", clean) + if not clean: + raise ValueError("Empty parameter name in pipeline file.") + if clean[0].isdigit(): + clean = "_" + clean + if prefix: + return f"{prefix}_{clean}" + return clean + + +def dynamic_opt(name: str, param_type: ParamType) -> str: + return f"--{param_type}-{name.replace('_', '-')}" diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py new file mode 100644 index 0000000..a532dd5 --- /dev/null +++ b/src/adagio/cli/dynamic.py @@ -0,0 +1,135 @@ +import inspect +from pathlib import Path +from typing import Any, Annotated, Callable + +from cyclopts import Parameter as CliParameter + +from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Parameter as ParamSpec +from .args import ParamType, dynamic_opt, to_identifier + + +def _spec_py_type(type_name: str) -> type: + return {"str": str, "int": int, "float": float, "bool": bool}.get(type_name, str) + + +def build_dynamic_run( + *, + input_specs: list[InputSpec], + param_specs: list[ParamSpec], + run_handler: Callable[ + [Path, dict[str, Any], list[tuple[str, str]], list[tuple[str, str]]], None + ], +): + input_bindings: list[tuple[str, str]] = [] + param_bindings: list[tuple[str, str]] = [] + seen_idents: set[str] = set() + seen_opts: set[str] = {"--pipeline", "-p"} + + annotations: dict[str, Any] = { + "pipeline": Annotated[ + Path, + CliParameter( + name=("--pipeline", "-p"), + help="Path to the pipeline JSON file.", + ), + ] + } + parameters: list[inspect.Parameter] = [ + inspect.Parameter( + name="pipeline", + kind=inspect.Parameter.KEYWORD_ONLY, + annotation=annotations["pipeline"], + ) + ] + + def add_dynamic_option( + *, + ident: str, + opt: str, + required: bool, + py_type: type, + help_text: str, + default: Any, + ) -> None: + if opt in seen_opts: + raise ValueError(f"Conflicting CLI option generated: {opt!r}.") + seen_opts.add(opt) + + annotations[ident] = Annotated[ + py_type, + CliParameter( + name=(opt,), + help=help_text, + required=required, + ), + ] + parameters.append( + inspect.Parameter( + name=ident, + kind=inspect.Parameter.KEYWORD_ONLY, + default=default, + annotation=annotations[ident], + ) + ) + + for spec in input_specs: + original = spec.name + ident = to_identifier(original, "input") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline input name after normalization: {original!r}." + ) + seen_idents.add(ident) + input_bindings.append((ident, original)) + + required = spec.required + type_text = spec.type + opt = dynamic_opt(original, ParamType.INPUT) + add_dynamic_option( + ident=ident, + opt=opt, + required=required, + py_type=str, + help_text=f"Pipeline input: {original}" + (f" ({type_text})" if type_text else ""), + default=inspect._empty if required else None, + ) + + for spec in param_specs: + original = spec.name + ident = to_identifier(original, "param") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline parameter name after normalization: {original!r}." + ) + seen_idents.add(ident) + param_bindings.append((ident, original)) + + default = spec.default + required = spec.required + opt = dynamic_opt(original, ParamType.PARAM) + add_dynamic_option( + ident=ident, + opt=opt, + required=bool(required and default is None), + py_type=_spec_py_type(spec.type), + help_text=f"Pipeline parameter: {original}", + default=default if default is not None else inspect._empty, + ) + + def run(pipeline: Path, **kwargs: Any) -> None: + run_handler( + pipeline, + kwargs, + input_bindings, + param_bindings, + ) + + run.__annotations__ = annotations + run.__signature__ = inspect.Signature(parameters) + run.__doc__ = ( + "Run an Adagio pipeline.\n\n" + "Dynamic inputs and parameters are loaded from the pipeline file and exposed as CLI options.\n" + "Use: adagio run --pipeline PATH --help" + ) + return run diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py new file mode 100644 index 0000000..757d3df --- /dev/null +++ b/src/adagio/cli/main.py @@ -0,0 +1,67 @@ +import json +import sys +from functools import partial +from pathlib import Path +from typing import Annotated + +from cyclopts import App, Parameter +from rich.console import Console + +from ..app.parsers.pipeline import parse_inputs, parse_parameters +from .args import extract_flag_value, promote_positional_pipeline +from .dynamic import build_dynamic_run +from .runner import run_pipeline_from_kwargs + + +console = Console() + + +def main(argv: list[str] | None = None) -> None: + argv = sys.argv[1:] if argv is None else argv + + argv, positional_pipeline = promote_positional_pipeline(argv) + pipeline_str = extract_flag_value(argv, "--pipeline", "-p") + if pipeline_str is None: + pipeline_str = positional_pipeline + + app = App( + name="adagio", + help="Adagio command line tool for processing pipelines created with the Adagio GUI.", + ) + + if not pipeline_str: + + @app.command + def run( + *, + pipeline: Annotated[ + Path, + Parameter( + name=("--pipeline", "-p"), help="Path to the pipeline JSON file." + ), + ], + ): + """Run a pipeline (requires --pipeline; dynamic options come from that file).""" + raise SystemExit( + "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" + ) + + app(argv) + return + + pipeline_path = Path(pipeline_str) + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + input_specs = parse_inputs(data) + param_specs = parse_parameters(data) + + dynamic_run = build_dynamic_run( + input_specs=input_specs, + param_specs=param_specs, + run_handler=partial(run_pipeline_from_kwargs, console=console), + ) + app.command(dynamic_run, name="run") + app(argv) + + +if __name__ == "__main__": + main() diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py new file mode 100644 index 0000000..218ce86 --- /dev/null +++ b/src/adagio/cli/runner.py @@ -0,0 +1,40 @@ +import json +from pathlib import Path +from typing import Any + +from rich.console import Console + + +def run_pipeline_from_kwargs( + pipeline: Path, + kwargs: dict[str, Any], + input_bindings: list[tuple[str, str]], + param_bindings: list[tuple[str, str]], + *, + console: Console, +) -> None: + try: + from ..execute import execute_pipeline + from ..model.pipeline import AdagioPipeline + except ModuleNotFoundError as exc: + raise SystemExit( + "Execution dependencies are missing. " + "Install runtime requirements (for example, qiime2/parsl) to run pipelines." + ) from exc + + data = json.loads(pipeline.read_text(encoding="utf-8")) + parsed_pipeline = AdagioPipeline.model_validate(data) + arguments = parsed_pipeline.signature.to_default_arguments() + + for ident, original in input_bindings: + value = kwargs.get(ident) + if value is not None: + arguments.inputs[original] = str(value) + + for ident, original in param_bindings: + if ident in kwargs: + arguments.parameters[original] = kwargs.get(ident) + + console.print(f"[bold]Pipeline:[/bold] {pipeline}") + console.print("[bold]Executing pipeline[/bold]") + execute_pipeline(pipeline=parsed_pipeline, arguments=arguments) diff --git a/src/adagio/hello.py b/src/adagio/hello.py deleted file mode 100644 index 9f0b5ce..0000000 --- a/src/adagio/hello.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Hello module.""" - -from pathlib import Path - - -def hello(input_file: Path) -> None: - """Hello.""" - print(input_file) From 23e11e706226b369bda16a83846617a46a495505 Mon Sep 17 00:00:00 2001 From: John Chase Date: Sun, 22 Feb 2026 15:54:45 -0800 Subject: [PATCH 05/44] Adds arguments --- examples/Dada2-arguments.json | 12 ++ examples/Dada2.adg | 301 +++++++++++++++++++++++++++ examples/simple.json | 377 ---------------------------------- src/adagio/cli/args.py | 6 + src/adagio/cli/dynamic.py | 88 ++++++-- src/adagio/cli/main.py | 96 ++++++++- src/adagio/cli/runner.py | 47 ++++- src/adagio/model/arguments.py | 8 +- 8 files changed, 538 insertions(+), 397 deletions(-) create mode 100644 examples/Dada2-arguments.json create mode 100644 examples/Dada2.adg delete mode 100644 examples/simple.json diff --git a/examples/Dada2-arguments.json b/examples/Dada2-arguments.json new file mode 100644 index 0000000..b1d3cf7 --- /dev/null +++ b/examples/Dada2-arguments.json @@ -0,0 +1,12 @@ +{ + "version": 1, + "inputs": { + "sample_metadata": "sm.tsv", + "table": "table.qza" + }, + "parameters": { + "compare": "treatment", + "metric": "canberra" + }, + "outputs": {} +} diff --git a/examples/Dada2.adg b/examples/Dada2.adg new file mode 100644 index 0000000..ec61d07 --- /dev/null +++ b/examples/Dada2.adg @@ -0,0 +1,301 @@ +{ + "name": "Dada2", + "description": "", + "spec": { + "type": "pipeline", + "meta": { + "version": "1.0.0rc" + }, + "signature": { + "inputs": [ + { + "id": "519bcdb4-0ebd-4d91-8831-d631514550ae", + "name": "seqs", + "type": "RawSequences | EMPSingleEndSequences | EMPPairedEndSequences", + "ast": { + "type": "union", + "members": [ + { + "name": "RawSequences", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + }, + { + "name": "EMPSingleEndSequences", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + }, + { + "name": "EMPPairedEndSequences", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + ] + }, + "required": true + }, + { + "id": "102b1edf-1bfa-413d-b878-ca7b75e5a43e", + "name": "barcodes", + "type": "MetadataColumn[Categorical]", + "ast": { + "name": "MetadataColumn", + "type": "expression", + "fields": [ + { + "name": "Categorical", + "type": "expression", + "fields": [], + "builtin": true, + "predicate": null + } + ], + "builtin": true, + "predicate": null + }, + "required": true + } + ], + "parameters": [ + { + "id": "62d9cdad-4d4d-4cb3-ac44-e1d3be2249c3", + "name": "barcodes", + "required": true, + "type": "MetadataColumn[Categorical]", + "ast": { + "name": "MetadataColumn", + "type": "expression", + "fields": [ + { + "name": "Categorical", + "type": "expression", + "fields": [], + "builtin": true, + "predicate": null + } + ], + "builtin": true, + "predicate": null + } + }, + { + "id": "48e95116-d211-4943-b03e-074834a97c0e", + "name": "trunc_len", + "required": false, + "default": null, + "type": "Int", + "ast": { + "name": "Int", + "type": "expression", + "fields": [], + "builtin": true, + "predicate": null + } + }, + { + "id": "4837a451-6e0b-40bb-8422-0740d73fe562", + "name": "trim_foobar", + "required": false, + "default": 150, + "type": "Int", + "ast": { + "name": "Int", + "type": "expression", + "fields": [], + "builtin": true, + "predicate": null + } + } + ], + "outputs": [ + { + "id": "53395d12-a842-4f3d-b960-4df47fb9d2b5", + "name": "table_1", + "type": "FeatureTable[Frequency]", + "ast": { + "name": "FeatureTable", + "type": "expression", + "fields": [ + { + "name": "Frequency", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + ], + "builtin": false, + "predicate": null + } + }, + { + "id": "c25ee826-dd40-4c50-960c-aa445bdf121f", + "name": "denoising_stats_1", + "type": "SampleData[DADA2Stats]", + "ast": { + "name": "SampleData", + "type": "expression", + "fields": [ + { + "name": "DADA2Stats", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + ], + "builtin": false, + "predicate": null + } + }, + { + "id": "0debddb6-8d5a-4e7d-82da-c96a3d7506cc", + "name": "representative_sequences_1", + "type": "FeatureData[Sequence]", + "ast": { + "name": "FeatureData", + "type": "expression", + "fields": [ + { + "name": "Sequence", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + ], + "builtin": false, + "predicate": null + } + }, + { + "id": "1dca4868-80b5-4c78-9b58-a267c390da37", + "name": "per_sample_sequences", + "type": "SampleData[SequencesWithQuality]", + "ast": { + "name": "SampleData", + "type": "expression", + "fields": [ + { + "name": "SequencesWithQuality", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + ], + "builtin": false, + "predicate": null + } + }, + { + "id": "9ea8facf-c776-4cf2-83f9-653dbac8edde", + "name": "error_correction_details", + "type": "ErrorCorrectionDetails", + "ast": { + "name": "ErrorCorrectionDetails", + "type": "expression", + "fields": [], + "builtin": false, + "predicate": null + } + } + ] + }, + "graph": [ + { + "id": "4cb48e7b-1ab9-4db2-b615-c03246fe79e4", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": { + "demultiplexed_seqs": { + "kind": "archive", + "id": "1dca4868-80b5-4c78-9b58-a267c390da37" + } + }, + "parameters": { + "trunc_len": { + "kind": "promoted", + "id": "48e95116-d211-4943-b03e-074834a97c0e" + }, + "trim_left": { + "kind": "promoted", + "id": "4837a451-6e0b-40bb-8422-0740d73fe562" + } + }, + "outputs": { + "table": { + "kind": "archive", + "id": "53395d12-a842-4f3d-b960-4df47fb9d2b5" + }, + "representative_sequences": { + "kind": "archive", + "id": "0debddb6-8d5a-4e7d-82da-c96a3d7506cc" + }, + "denoising_stats": { + "kind": "archive", + "id": "c25ee826-dd40-4c50-960c-aa445bdf121f" + } + } + }, + { + "id": "d034ef2e-23c2-4b84-bbae-dd2a9b5bfba9", + "kind": "plugin-action", + "plugin": "demux", + "action": "emp_single", + "inputs": { + "seqs": { + "kind": "archive", + "id": "519bcdb4-0ebd-4d91-8831-d631514550ae" + }, + "barcodes": { + "kind": "metadata", + "id": "102b1edf-1bfa-413d-b878-ca7b75e5a43e" + } + }, + "parameters": { + "barcodes": { + "kind": "metadata", + "column": { + "kind": "promoted", + "id": "62d9cdad-4d4d-4cb3-ac44-e1d3be2249c3" + } + } + }, + "outputs": { + "per_sample_sequences": { + "kind": "archive", + "id": "1dca4868-80b5-4c78-9b58-a267c390da37" + }, + "error_correction_details": { + "kind": "archive", + "id": "9ea8facf-c776-4cf2-83f9-653dbac8edde" + } + } + } + ] + }, + "layout": { + "version": 0, + "type": "grid", + "positions": { + "4cb48e7b-1ab9-4db2-b615-c03246fe79e4": { + "row": 2, + "col": 3 + }, + "d034ef2e-23c2-4b84-bbae-dd2a9b5bfba9": { + "row": 1, + "col": 3 + } + } + }, + "exportedAt": "2026-02-17T03:58:22.236Z", + "version": 1 +} \ No newline at end of file diff --git a/examples/simple.json b/examples/simple.json deleted file mode 100644 index 7600d0e..0000000 --- a/examples/simple.json +++ /dev/null @@ -1,377 +0,0 @@ -{ - "type": "pipeline", - "meta": { - "version": "1.0.0rc" - }, - "signature": { - "inputs": [ - { - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7", - "name": "sample_metadata", - "type": "Metadata", - "ast": { - "name": "Metadata", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - }, - "required": true - }, - { - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264", - "name": "table", - "type": "FeatureTable[Frequency | PresenceAbsence]", - "ast": { - "name": "FeatureTable", - "type": "expression", - "fields": [ - { - "type": "union", - "members": [ - { - "name": "Frequency", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - }, - { - "name": "PresenceAbsence", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ] - } - ], - "builtin": false, - "predicate": null - }, - "required": true - } - ], - "parameters": [ - { - "id": "c2051f90-5128-4197-a430-e1be6e0ace56", - "name": "metric", - "required": false, - "default": "canberra", - "type": "Str % Choices('aitchison', 'braycurtis', 'canberra', 'canberra_adkins', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule')", - "ast": { - "name": "Str", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": { - "name": "Choices", - "type": "predicate", - "choices": [ - "aitchison", - "braycurtis", - "canberra", - "canberra_adkins", - "chebyshev", - "cityblock", - "correlation", - "cosine", - "dice", - "euclidean", - "hamming", - "jaccard", - "jensenshannon", - "matching", - "minkowski", - "rogerstanimoto", - "russellrao", - "seuclidean", - "sokalmichener", - "sokalsneath", - "sqeuclidean", - "yule" - ] - } - } - }, - { - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da", - "name": "compare", - "required": true, - "default": null, - "type": "MetadataColumn[Categorical]", - "ast": { - "name": "MetadataColumn", - "type": "expression", - "fields": [ - { - "name": "Categorical", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - ], - "builtin": true, - "predicate": null - } - } - ], - "outputs": [ - { - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2", - "name": "summary", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572", - "name": "distance_matrix1", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf", - "name": "beta-group", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7", - "name": "distance_matrix2", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "332931ea-4a96-42ad-bf32-81713781e29b", - "name": "pcoa", - "type": "PCoAResults", - "ast": { - "name": "PCoAResults", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9", - "name": "emperor", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - } - ] - }, - "graph": [ - { - "id": "febcf6e0-90ed-44a6-8b1b-f7beb53223a2", - "kind": "built-in", - "name": "root-input", - "inputs": { - "sample_metadata": { - "kind": "archive", - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7" - } - }, - "parameters": {}, - "outputs": { - "sample_metadata": { - "kind": "archive", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - } - }, - { - "id": "a0ec2ae8-6736-4fab-9b07-9e718a7194f1", - "kind": "built-in", - "name": "root-input", - "inputs": { - "table": { - "kind": "archive", - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264" - } - }, - "parameters": {}, - "outputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - } - }, - { - "id": "31efdf49-e06b-4ad5-8c97-47a2a323120f", - "kind": "plugin-action", - "plugin": "feature_table", - "action": "summarize", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - }, - "sample_metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2" - } - } - }, - { - "id": "cfd580cd-7278-45b4-84f6-f879b1f94b24", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - } - } - }, - { - "id": "22a5a5d7-d532-4251-914a-ecf34df945fd", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta_group_significance", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": { - "metadata": { - "kind": "metadata", - "column": { - "kind": "promoted", - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da" - } - } - }, - "outputs": { - "visualization": { - "kind": "archive", - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf" - } - } - }, - { - "id": "68a5d6a8-7d24-40d7-8197-3294951f5cd6", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - } - }, - { - "id": "ef9ed04d-bb5e-40ab-9292-b5c0573be32d", - "kind": "plugin-action", - "plugin": "diversity", - "action": "pcoa", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - }, - "parameters": {}, - "outputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - } - } - }, - { - "id": "464fc7c0-a91b-4d66-a304-8668c0377867", - "kind": "plugin-action", - "plugin": "emperor", - "action": "plot", - "inputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9" - } - } - } - ] -} \ No newline at end of file diff --git a/src/adagio/cli/args.py b/src/adagio/cli/args.py index c3e06c0..e891310 100644 --- a/src/adagio/cli/args.py +++ b/src/adagio/cli/args.py @@ -7,6 +7,12 @@ class ParamType(StrEnum): PARAM = "param" +class ShowParamsMode(StrEnum): + ALL = "all" + MISSING = "missing" + REQUIRED = "required" + + def promote_positional_pipeline(argv: list[str]) -> tuple[list[str], str | None]: """Allow `adagio run ` by rewriting it to `--pipeline `.""" if len(argv) < 2 or argv[0] != "run": diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index a532dd5..0e1d8e2 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -6,7 +6,7 @@ from ..app.parsers.pipeline import Input as InputSpec from ..app.parsers.pipeline import Parameter as ParamSpec -from .args import ParamType, dynamic_opt, to_identifier +from .args import ParamType, ShowParamsMode, dynamic_opt, to_identifier def _spec_py_type(type_name: str) -> type: @@ -18,13 +18,24 @@ def build_dynamic_run( input_specs: list[InputSpec], param_specs: list[ParamSpec], run_handler: Callable[ - [Path, dict[str, Any], list[tuple[str, str]], list[tuple[str, str]]], None + [ + Path, + Path | None, + dict[str, Any], + list[tuple[str, str]], + list[tuple[str, str]], + list[str], + list[str], + ], + None, ], ): input_bindings: list[tuple[str, str]] = [] param_bindings: list[tuple[str, str]] = [] + required_inputs: list[str] = [] + required_params: list[str] = [] seen_idents: set[str] = set() - seen_opts: set[str] = {"--pipeline", "-p"} + seen_opts: set[str] = {"--pipeline", "-p", "--arguments", "--show-params"} annotations: dict[str, Any] = { "pipeline": Annotated[ @@ -35,12 +46,40 @@ def build_dynamic_run( ), ] } + + annotations["arguments_file"] = Annotated[ + Path | None, + CliParameter( + name=("--arguments",), + help="Path to a JSON arguments file. Values are applied before CLI overrides.", + ), + ] + annotations["show_params"] = Annotated[ + ShowParamsMode, + CliParameter( + name=("--show-params",), + help="Parameter display mode: all, missing, or required.", + ), + ] + parameters: list[inspect.Parameter] = [ inspect.Parameter( name="pipeline", kind=inspect.Parameter.KEYWORD_ONLY, annotation=annotations["pipeline"], - ) + ), + inspect.Parameter( + name="arguments_file", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["arguments_file"], + ), + inspect.Parameter( + name="show_params", + kind=inspect.Parameter.KEYWORD_ONLY, + default=ShowParamsMode.REQUIRED, + annotation=annotations["show_params"], + ), ] def add_dynamic_option( @@ -56,8 +95,9 @@ def add_dynamic_option( raise ValueError(f"Conflicting CLI option generated: {opt!r}.") seen_opts.add(opt) + annotation_type = py_type | None if default is None else py_type annotations[ident] = Annotated[ - py_type, + annotation_type, CliParameter( name=(opt,), help=help_text, @@ -82,17 +122,22 @@ def add_dynamic_option( ) seen_idents.add(ident) input_bindings.append((ident, original)) + if spec.required: + required_inputs.append(original) - required = spec.required type_text = spec.type opt = dynamic_opt(original, ParamType.INPUT) add_dynamic_option( ident=ident, opt=opt, - required=required, + required=False, py_type=str, - help_text=f"Pipeline input: {original}" + (f" ({type_text})" if type_text else ""), - default=inspect._empty if required else None, + help_text=( + f"Pipeline input: {original}" + + (f" ({type_text})" if type_text else "") + + (" [required]" if spec.required else "") + ), + default=None, ) for spec in param_specs: @@ -108,21 +153,38 @@ def add_dynamic_option( default = spec.default required = spec.required opt = dynamic_opt(original, ParamType.PARAM) + is_required = bool(required and default is None) + if is_required: + required_params.append(original) + default_text = f" [default: {default}]" if default is not None else "" add_dynamic_option( ident=ident, opt=opt, - required=bool(required and default is None), + required=False, py_type=_spec_py_type(spec.type), - help_text=f"Pipeline parameter: {original}", - default=default if default is not None else inspect._empty, + help_text=( + f"Pipeline parameter: {original}" + + (" [required]" if is_required else "") + + default_text + ), + default=None, ) - def run(pipeline: Path, **kwargs: Any) -> None: + def run( + pipeline: Path, + arguments_file: Path | None = None, + show_params: ShowParamsMode = ShowParamsMode.REQUIRED, + **kwargs: Any, + ) -> None: + _ = show_params run_handler( pipeline, + arguments_file, kwargs, input_bindings, param_bindings, + required_inputs, + required_params, ) run.__annotations__ = annotations diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 757d3df..6ddc240 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -2,13 +2,15 @@ import sys from functools import partial from pathlib import Path -from typing import Annotated +from typing import Annotated, Any from cyclopts import App, Parameter from rich.console import Console +from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Parameter as ParamSpec from ..app.parsers.pipeline import parse_inputs, parse_parameters -from .args import extract_flag_value, promote_positional_pipeline +from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .dynamic import build_dynamic_run from .runner import run_pipeline_from_kwargs @@ -21,6 +23,15 @@ def main(argv: list[str] | None = None) -> None: argv, positional_pipeline = promote_positional_pipeline(argv) pipeline_str = extract_flag_value(argv, "--pipeline", "-p") + show_mode_str = extract_flag_value(argv, "--show-params") + try: + show_mode = ( + ShowParamsMode(show_mode_str) if show_mode_str else ShowParamsMode.REQUIRED + ) + except ValueError as exc: + raise SystemExit( + "Invalid --show-params value. Use one of: all, missing, required." + ) from exc if pipeline_str is None: pipeline_str = positional_pipeline @@ -40,7 +51,22 @@ def run( name=("--pipeline", "-p"), help="Path to the pipeline JSON file." ), ], + arguments: Annotated[ + Path | None, + Parameter( + name=("--arguments",), + help="Path to a JSON arguments file.", + ), + ] = None, + show_params: Annotated[ + ShowParamsMode, + Parameter( + name=("--show-params",), + help="Parameter display mode: all, missing, or required.", + ), + ] = ShowParamsMode.REQUIRED, ): + _ = show_params """Run a pipeline (requires --pipeline; dynamic options come from that file).""" raise SystemExit( "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" @@ -53,15 +79,77 @@ def run( data = json.loads(pipeline_path.read_text(encoding="utf-8")) input_specs = parse_inputs(data) param_specs = parse_parameters(data) - - dynamic_run = build_dynamic_run( + arguments_path_str = extract_flag_value(argv, "--arguments") + arguments_data = ( + _load_arguments_data(Path(arguments_path_str)) if arguments_path_str else None + ) + visible_inputs, visible_params = _filter_visible_specs( input_specs=input_specs, param_specs=param_specs, + show_mode=show_mode, + arguments_data=arguments_data, + ) + + dynamic_run = build_dynamic_run( + input_specs=visible_inputs, + param_specs=visible_params, run_handler=partial(run_pipeline_from_kwargs, console=console), ) app.command(dynamic_run, name="run") app(argv) +def _filter_visible_specs( + *, + input_specs: list[InputSpec], + param_specs: list[ParamSpec], + show_mode: ShowParamsMode, + arguments_data: dict[str, Any] | None, +) -> tuple[list[InputSpec], list[ParamSpec]]: + if show_mode is ShowParamsMode.ALL: + return input_specs, param_specs + + state_inputs = {spec.name: None for spec in input_specs} + state_params = {spec.name: spec.default for spec in param_specs} + + if arguments_data is not None: + state_inputs.update(arguments_data.get("inputs", {})) + state_params.update(arguments_data.get("parameters", {})) + + if show_mode is ShowParamsMode.REQUIRED: + filtered_inputs = [spec for spec in input_specs if spec.required] + filtered_params = [ + spec for spec in param_specs if bool(spec.required and spec.default is None) + ] + return filtered_inputs, filtered_params + + filtered_inputs = [ + spec for spec in input_specs if _is_missing(state_inputs.get(spec.name)) + ] + filtered_params = [ + spec for spec in param_specs if _is_missing(state_params.get(spec.name)) + ] + return filtered_inputs, filtered_params + + +def _load_arguments_data(path: Path) -> dict[str, Any]: + data = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(data, dict): + raise SystemExit("Invalid arguments file: expected a JSON object.") + if "inputs" not in data: + data["inputs"] = {} + if "parameters" not in data: + data["parameters"] = {} + if not isinstance(data.get("inputs"), dict) or not isinstance( + data.get("parameters"), dict + ): + raise SystemExit("Invalid arguments file: 'inputs' and 'parameters' must be objects.") + return data + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" + + if __name__ == "__main__": main() diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 218ce86..0023c88 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -7,14 +7,18 @@ def run_pipeline_from_kwargs( pipeline: Path, + arguments_file: Path | None, kwargs: dict[str, Any], input_bindings: list[tuple[str, str]], param_bindings: list[tuple[str, str]], + required_inputs: list[str], + required_params: list[str], *, console: Console, ) -> None: try: from ..execute import execute_pipeline + from ..model.arguments import AdagioArgumentsFile from ..model.pipeline import AdagioPipeline except ModuleNotFoundError as exc: raise SystemExit( @@ -26,15 +30,54 @@ def run_pipeline_from_kwargs( parsed_pipeline = AdagioPipeline.model_validate(data) arguments = parsed_pipeline.signature.to_default_arguments() + input_names = {name for _, name in input_bindings} + param_names = {name for _, name in param_bindings} + + if arguments_file is not None: + file_data = json.loads(arguments_file.read_text(encoding="utf-8")) + arguments_data = AdagioArgumentsFile.model_validate(file_data) + + unknown_inputs = sorted(set(arguments_data.inputs) - input_names) + if unknown_inputs: + raise SystemExit( + "Unknown inputs in arguments file: " + ", ".join(unknown_inputs) + ) + + unknown_params = sorted(set(arguments_data.parameters) - param_names) + if unknown_params: + raise SystemExit( + "Unknown parameters in arguments file: " + ", ".join(unknown_params) + ) + + arguments.inputs.update(arguments_data.inputs) + arguments.parameters.update(arguments_data.parameters) + if arguments_data.outputs: + arguments.outputs = arguments_data.outputs + for ident, original in input_bindings: value = kwargs.get(ident) if value is not None: arguments.inputs[original] = str(value) for ident, original in param_bindings: - if ident in kwargs: - arguments.parameters[original] = kwargs.get(ident) + value = kwargs.get(ident) + if value is not None: + arguments.parameters[original] = value + + missing_inputs = [name for name in required_inputs if _is_missing(arguments.inputs.get(name))] + missing_params = [ + name for name in required_params if _is_missing(arguments.parameters.get(name)) + ] + if missing_inputs or missing_params: + missing = [f"input:{name}" for name in missing_inputs] + [ + f"param:{name}" for name in missing_params + ] + raise SystemExit("Missing required arguments: " + ", ".join(missing)) console.print(f"[bold]Pipeline:[/bold] {pipeline}") console.print("[bold]Executing pipeline[/bold]") execute_pipeline(pipeline=parsed_pipeline, arguments=arguments) + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" diff --git a/src/adagio/model/arguments.py b/src/adagio/model/arguments.py index aba7a80..96ff047 100644 --- a/src/adagio/model/arguments.py +++ b/src/adagio/model/arguments.py @@ -1,5 +1,5 @@ import typing as t -from pydantic import BaseModel +from pydantic import BaseModel, Field from .task import AllowableValue @@ -27,3 +27,9 @@ def _format_repr_sect(self, section, name): return lines + +class AdagioArgumentsFile(BaseModel): + version: int = 1 + inputs: dict[str, str] = Field(default_factory=dict) + parameters: dict[str, AllowableValue] = Field(default_factory=dict) + outputs: dict[str, str] = Field(default_factory=dict) From cfa1f8883bd8203d06d65a81614da1620afd3bb4 Mon Sep 17 00:00:00 2001 From: John Chase Date: Sun, 22 Feb 2026 15:57:35 -0800 Subject: [PATCH 06/44] removes example --- examples/simple.json | 377 ------------------------------------------- 1 file changed, 377 deletions(-) delete mode 100644 examples/simple.json diff --git a/examples/simple.json b/examples/simple.json deleted file mode 100644 index 7600d0e..0000000 --- a/examples/simple.json +++ /dev/null @@ -1,377 +0,0 @@ -{ - "type": "pipeline", - "meta": { - "version": "1.0.0rc" - }, - "signature": { - "inputs": [ - { - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7", - "name": "sample_metadata", - "type": "Metadata", - "ast": { - "name": "Metadata", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - }, - "required": true - }, - { - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264", - "name": "table", - "type": "FeatureTable[Frequency | PresenceAbsence]", - "ast": { - "name": "FeatureTable", - "type": "expression", - "fields": [ - { - "type": "union", - "members": [ - { - "name": "Frequency", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - }, - { - "name": "PresenceAbsence", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ] - } - ], - "builtin": false, - "predicate": null - }, - "required": true - } - ], - "parameters": [ - { - "id": "c2051f90-5128-4197-a430-e1be6e0ace56", - "name": "metric", - "required": false, - "default": "canberra", - "type": "Str % Choices('aitchison', 'braycurtis', 'canberra', 'canberra_adkins', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule')", - "ast": { - "name": "Str", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": { - "name": "Choices", - "type": "predicate", - "choices": [ - "aitchison", - "braycurtis", - "canberra", - "canberra_adkins", - "chebyshev", - "cityblock", - "correlation", - "cosine", - "dice", - "euclidean", - "hamming", - "jaccard", - "jensenshannon", - "matching", - "minkowski", - "rogerstanimoto", - "russellrao", - "seuclidean", - "sokalmichener", - "sokalsneath", - "sqeuclidean", - "yule" - ] - } - } - }, - { - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da", - "name": "compare", - "required": true, - "default": null, - "type": "MetadataColumn[Categorical]", - "ast": { - "name": "MetadataColumn", - "type": "expression", - "fields": [ - { - "name": "Categorical", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - ], - "builtin": true, - "predicate": null - } - } - ], - "outputs": [ - { - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2", - "name": "summary", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572", - "name": "distance_matrix1", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf", - "name": "beta-group", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7", - "name": "distance_matrix2", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "332931ea-4a96-42ad-bf32-81713781e29b", - "name": "pcoa", - "type": "PCoAResults", - "ast": { - "name": "PCoAResults", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9", - "name": "emperor", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - } - ] - }, - "graph": [ - { - "id": "febcf6e0-90ed-44a6-8b1b-f7beb53223a2", - "kind": "built-in", - "name": "root-input", - "inputs": { - "sample_metadata": { - "kind": "archive", - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7" - } - }, - "parameters": {}, - "outputs": { - "sample_metadata": { - "kind": "archive", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - } - }, - { - "id": "a0ec2ae8-6736-4fab-9b07-9e718a7194f1", - "kind": "built-in", - "name": "root-input", - "inputs": { - "table": { - "kind": "archive", - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264" - } - }, - "parameters": {}, - "outputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - } - }, - { - "id": "31efdf49-e06b-4ad5-8c97-47a2a323120f", - "kind": "plugin-action", - "plugin": "feature_table", - "action": "summarize", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - }, - "sample_metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2" - } - } - }, - { - "id": "cfd580cd-7278-45b4-84f6-f879b1f94b24", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - } - } - }, - { - "id": "22a5a5d7-d532-4251-914a-ecf34df945fd", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta_group_significance", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": { - "metadata": { - "kind": "metadata", - "column": { - "kind": "promoted", - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da" - } - } - }, - "outputs": { - "visualization": { - "kind": "archive", - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf" - } - } - }, - { - "id": "68a5d6a8-7d24-40d7-8197-3294951f5cd6", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - } - }, - { - "id": "ef9ed04d-bb5e-40ab-9292-b5c0573be32d", - "kind": "plugin-action", - "plugin": "diversity", - "action": "pcoa", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - }, - "parameters": {}, - "outputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - } - } - }, - { - "id": "464fc7c0-a91b-4d66-a304-8668c0377867", - "kind": "plugin-action", - "plugin": "emperor", - "action": "plot", - "inputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9" - } - } - } - ] -} \ No newline at end of file From c88913b0defed5555bf1445cd079b4a64edfdb2f Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 23 Feb 2026 18:23:39 -0800 Subject: [PATCH 07/44] Adds dummy progress ' --- pyproject.toml | 2 +- src/adagio/cli/args.py | 8 +- src/adagio/cli/dynamic.py | 106 +++++++++++++++++++++-- src/adagio/cli/runner.py | 100 ++++++++++++++++++--- src/adagio/dummy_execute.py | 96 ++++++++++++++++++++ src/adagio/execution/context.py | 15 +++- src/adagio/model/pipeline.py | 4 +- src/adagio/model/task.py | 8 +- src/adagio/monitor/api.py | 47 ++++++---- src/adagio/monitor/log.py | 44 +++++++++- src/adagio/monitor/tty.py | 149 ++++++++++++++++++++++++++++++++ 11 files changed, 531 insertions(+), 48 deletions(-) create mode 100644 src/adagio/dummy_execute.py diff --git a/pyproject.toml b/pyproject.toml index 1f50115..6f90f2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "0.0.0" description = "Adagio command line tool" readme = "README.md" requires-python = ">=3.10" -dependencies = ["cyclopts>=4.5.3", "pydantic>=2.12.5", "rich>=14.1.0"] +dependencies = ["cyclopts>=4.5.3", "pydantic>=2.12.5", "rich>=14.1.0", "parsl>=2024.12.16"] [dependency-groups] diff --git a/src/adagio/cli/args.py b/src/adagio/cli/args.py index c3e06c0..27d9a89 100644 --- a/src/adagio/cli/args.py +++ b/src/adagio/cli/args.py @@ -1,5 +1,11 @@ import re -from enum import StrEnum +from enum import Enum + +try: + from enum import StrEnum +except ImportError: # pragma: no cover - Python < 3.11 + class StrEnum(str, Enum): + pass class ParamType(StrEnum): diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index a532dd5..fc34e5b 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -1,4 +1,5 @@ import inspect +import re from pathlib import Path from typing import Any, Annotated, Callable @@ -10,7 +11,40 @@ def _spec_py_type(type_name: str) -> type: - return {"str": str, "int": int, "float": float, "bool": bool}.get(type_name, str) + normalized = re.sub(r"[^a-z0-9]+", " ", (type_name or "").lower()).strip() + tokens = set(normalized.split()) + + if {"bool", "boolean"} & tokens or "bool" in normalized: + return bool + if {"int", "integer"} & tokens or "int" in normalized: + return int + if {"float", "double", "number", "numeric", "real"} & tokens: + return float + if {"str", "string", "text"} & tokens: + return str + return str + + +def _default_py_type(default: Any) -> type | None: + if isinstance(default, bool): + return bool + if isinstance(default, int): + return int + if isinstance(default, float): + return float + if isinstance(default, str): + return str + return None + + +def _resolve_param_type(type_name: str, default: Any) -> type: + declared = _spec_py_type(type_name) + inferred = _default_py_type(default) + if inferred is None: + return declared + if declared is str and inferred is not str: + return inferred + return declared def build_dynamic_run( @@ -48,7 +82,7 @@ def add_dynamic_option( ident: str, opt: str, required: bool, - py_type: type, + py_type: Any, help_text: str, default: Any, ) -> None: @@ -73,6 +107,63 @@ def add_dynamic_option( ) ) + add_dynamic_option( + ident="arguments_file", + opt="--arguments", + required=False, + py_type=Path | None, + help_text="Path to an arguments JSON file to pre-populate inputs, parameters, and outputs.", + default=None, + ) + add_dynamic_option( + ident="dummy", + opt="--dummy", + required=False, + py_type=bool, + help_text="Run a simulated pipeline execution instead of invoking runtime plugins.", + default=True, + ) + add_dynamic_option( + ident="dummy_min_seconds", + opt="--dummy-min-seconds", + required=False, + py_type=float, + help_text="Minimum seconds spent per task in dummy mode.", + default=10.0, + ) + add_dynamic_option( + ident="dummy_max_seconds", + opt="--dummy-max-seconds", + required=False, + py_type=float, + help_text="Maximum seconds spent per task in dummy mode.", + default=15.0, + ) + add_dynamic_option( + ident="dummy_fail_rate", + opt="--dummy-fail-rate", + required=False, + py_type=float, + help_text="Failure probability per task in dummy mode (0.0 to 1.0).", + default=0.0, + ) + add_dynamic_option( + ident="dummy_subtasks", + opt="--dummy-subtasks", + required=False, + py_type=int, + help_text="Number of subtasks shown for each task in dummy mode.", + default=3, + ) + add_dynamic_option( + ident="dummy_seed", + opt="--dummy-seed", + required=False, + py_type=int | None, + help_text="Optional random seed for deterministic dummy runs.", + default=None, + ) + for spec in input_specs: original = spec.name ident = to_identifier(original, "input") @@ -107,14 +198,19 @@ def add_dynamic_option( default = spec.default required = spec.required + is_required = bool(required and default is None) + param_default = inspect._empty if is_required else default + param_type: Any = _resolve_param_type(spec.type, default) + if not is_required and default is None: + param_type = param_type | None opt = dynamic_opt(original, ParamType.PARAM) add_dynamic_option( ident=ident, opt=opt, - required=bool(required and default is None), - py_type=_spec_py_type(spec.type), + required=is_required, + py_type=param_type, help_text=f"Pipeline parameter: {original}", - default=default if default is not None else inspect._empty, + default=param_default, ) def run(pipeline: Path, **kwargs: Any) -> None: diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 218ce86..b82a719 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,4 +1,5 @@ import json +import sys from pathlib import Path from typing import Any @@ -13,19 +14,32 @@ def run_pipeline_from_kwargs( *, console: Console, ) -> None: - try: - from ..execute import execute_pipeline - from ..model.pipeline import AdagioPipeline - except ModuleNotFoundError as exc: - raise SystemExit( - "Execution dependencies are missing. " - "Install runtime requirements (for example, qiime2/parsl) to run pipelines." - ) from exc + from ..dummy_execute import ( + DummyExecutionConfig, + DummyExecutionFailed, + execute_dummy_pipeline, + ) + from ..model.pipeline import AdagioPipeline + from ..monitor.tty import RichMonitor data = json.loads(pipeline.read_text(encoding="utf-8")) - parsed_pipeline = AdagioPipeline.model_validate(data) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) arguments = parsed_pipeline.signature.to_default_arguments() + arguments_file = kwargs.pop("arguments_file", None) + if arguments_file is not None: + _merge_arguments_file(arguments, Path(arguments_file)) + + dummy_enabled = bool(kwargs.pop("dummy", True)) + dummy_config = DummyExecutionConfig( + min_seconds=float(kwargs.pop("dummy_min_seconds", 10.0)), + max_seconds=float(kwargs.pop("dummy_max_seconds", 15.0)), + fail_rate=float(kwargs.pop("dummy_fail_rate", 0.0)), + subtasks=int(kwargs.pop("dummy_subtasks", 3)), + seed=kwargs.pop("dummy_seed", None), + ) + for ident, original in input_bindings: value = kwargs.get(ident) if value is not None: @@ -36,5 +50,69 @@ def run_pipeline_from_kwargs( arguments.parameters[original] = kwargs.get(ident) console.print(f"[bold]Pipeline:[/bold] {pipeline}") - console.print("[bold]Executing pipeline[/bold]") - execute_pipeline(pipeline=parsed_pipeline, arguments=arguments) + console.print( + f"[bold]Executing pipeline[/bold] ({'dummy' if dummy_enabled else 'runtime'} mode)" + ) + + try: + if dummy_enabled: + execute_dummy_pipeline( + pipeline=parsed_pipeline, + arguments=arguments, + monitor=RichMonitor(console=console), + dummy=dummy_config, + ) + else: + raise SystemExit( + "Runtime execution is temporarily disabled. " + "Use default dummy mode (or pass --dummy)." + ) + except DummyExecutionFailed as exc: + raise SystemExit(str(exc)) from exc + except (ModuleNotFoundError, ImportError) as exc: + if dummy_enabled: + raise + missing = getattr(exc, "name", None) or "unknown" + raise SystemExit( + "Execution dependencies are missing. " + f"Missing module: {missing!r}. " + f"Details: {exc}. " + f"Python executable: {sys.executable}. " + "Install runtime requirements (for example, qiime2/parsl) in that same environment." + ) from exc + + +def _merge_arguments_file(arguments, arguments_file: Path) -> None: + try: + text = arguments_file.read_text(encoding="utf-8") + except OSError as exc: + raise SystemExit(f"Unable to read arguments file: {arguments_file}") from exc + + try: + payload = json.loads(text) + except json.JSONDecodeError as exc: + raise SystemExit(f"Invalid JSON in arguments file: {arguments_file}") from exc + + if not isinstance(payload, dict): + raise SystemExit(f"Invalid arguments file format: {arguments_file}") + + inputs = payload.get("inputs") + if isinstance(inputs, dict): + for key, value in inputs.items(): + arguments.inputs[key] = str(value) + + params = payload.get("parameters") + if isinstance(params, dict): + for key, value in params.items(): + arguments.parameters[key] = value + + if "outputs" in payload: + outputs = payload["outputs"] + if isinstance(outputs, str): + arguments.outputs = outputs + elif isinstance(outputs, dict): + arguments.outputs = { + str(key): str(value) for key, value in outputs.items() + } + else: + raise SystemExit(f"Invalid outputs in arguments file: {arguments_file}") diff --git a/src/adagio/dummy_execute.py b/src/adagio/dummy_execute.py new file mode 100644 index 0000000..77aa785 --- /dev/null +++ b/src/adagio/dummy_execute.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import random +import time +from dataclasses import dataclass +from typing import Any + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.monitor.api import Monitor +from adagio.monitor.log import LogMonitor + + +@dataclass +class DummyExecutionConfig: + min_seconds: float = 10.0 + max_seconds: float = 15.0 + fail_rate: float = 0.0 + subtasks: int = 3 + seed: int | None = None + + +class DummyExecutionFailed(RuntimeError): + pass + + +def execute_dummy_pipeline( + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + monitor: Monitor | None = None, + dummy: DummyExecutionConfig | None = None, +) -> None: + sig = pipeline.signature + monitor = monitor or LogMonitor() + dummy = dummy or DummyExecutionConfig() + tasks = list(pipeline.iter_tasks()) + + pipeline.validate_graph() + sig.validate_arguments(arguments) + + subtasks = max(dummy.subtasks, 1) + fail_rate = min(max(dummy.fail_rate, 0.0), 1.0) + min_seconds, max_seconds = sorted( + (max(dummy.min_seconds, 0.0), max(dummy.max_seconds, 0.0)) + ) + rng = random.Random(dummy.seed) + + monitor.start_pipeline(total_tasks=len(tasks)) + try: + for task in tasks: + monitor.queue_task( + task_id=task.id, + label=_task_label(task), + total_subtasks=subtasks, + ) + + for task in tasks: + monitor.start_task(task_id=task.id) + duration = rng.uniform(min_seconds, max_seconds) + sleep_per_subtask = duration / subtasks + + for subtask_index in range(subtasks): + if sleep_per_subtask > 0: + time.sleep(sleep_per_subtask) + monitor.advance_task( + task_id=task.id, + advance=1, + ) + + if rng.random() < fail_rate: + monitor.finish_task( + task_id=task.id, + status="failed", + error="simulated failure", + ) + raise DummyExecutionFailed( + f"Dummy execution failed at task '{task.id}'." + ) + + monitor.finish_task(task_id=task.id, status="completed") + finally: + monitor.finish_pipeline() + + +def _task_label(task: Any) -> str: + kind = getattr(task, "kind", "unknown") + task_id = getattr(task, "id", "") + if kind == "plugin-action": + plugin = getattr(task, "plugin", "") + action = getattr(task, "action", "") + return f"{task_id} ({plugin}.{action})" + if kind == "built-in": + name = getattr(task, "name", "built-in") + return f"{task_id} ({name})" + return task_id diff --git a/src/adagio/execution/context.py b/src/adagio/execution/context.py index e82ad52..a624804 100644 --- a/src/adagio/execution/context.py +++ b/src/adagio/execution/context.py @@ -2,14 +2,23 @@ from qiime2.sdk.proxy import ProxyResults, Proxy from qiime2.sdk import Pipeline, Results -from qiime2.sdk.context import ParallelContext +try: + from qiime2.sdk.context import ParallelContext as _BaseContext +except ImportError: # pragma: no cover - older qiime2 + from qiime2.sdk.context import Context as _BaseContext from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl -class AdagioContext(ParallelContext): +class AdagioContext(_BaseContext): def __init__(self, action_obj=None, parent=None): - super().__init__(action_obj, parent) + try: + # Newer qiime2 parallel context API + super().__init__(action_obj, parent) + except TypeError: + # qiime2<=2024.10: Context(parent=None, parallel=False) + super().__init__(parent=parent, parallel=True) + self.action_obj = action_obj def _callable_action_(self, *args, **kwargs): diff --git a/src/adagio/model/pipeline.py b/src/adagio/model/pipeline.py index 2f32519..f5bc935 100644 --- a/src/adagio/model/pipeline.py +++ b/src/adagio/model/pipeline.py @@ -118,8 +118,8 @@ class _InputDef(_Def): class _ParameterDef(_Def): required: bool - default: 'AllowableValue' + default: 'AllowableValue | None' = None class _OutputDef(_Def): - pass \ No newline at end of file + pass diff --git a/src/adagio/model/task.py b/src/adagio/model/task.py index fc38ae5..2e91602 100644 --- a/src/adagio/model/task.py +++ b/src/adagio/model/task.py @@ -1,10 +1,6 @@ import typing as t from pydantic import BaseModel, Field -from adagio.io import convert_metadata - - - class _BaseTask(BaseModel): id: str @@ -24,6 +20,8 @@ class PluginActionTask(_BaseTask): action: str def exec(self, ctx, params, scope): + from adagio.io import convert_metadata + action = ctx.get_action(self.plugin, self.action) kwargs = {} metadata = {} @@ -110,4 +108,4 @@ class MetadataVal(BaseModel): Collection = list[Primitive] | dict[str, Primitive] AllowableValue = Primitive | Collection AdagioTask = t.Annotated[t.Union[PluginActionTask, RootInputTask], - Field(discriminator='kind')] \ No newline at end of file + Field(discriminator='kind')] diff --git a/src/adagio/monitor/api.py b/src/adagio/monitor/api.py index 3a4eaa8..8a8de02 100644 --- a/src/adagio/monitor/api.py +++ b/src/adagio/monitor/api.py @@ -1,28 +1,39 @@ +from __future__ import annotations + + class Monitor: + def start_pipeline(self, *, total_tasks: int = 0) -> None: + return None - def start_pipeline(self): - pass + def start_load_input(self) -> None: + return None - def start_load_input(self): - pass + def finish_load_input(self) -> None: + return None - def finish_load_input(self): - pass + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + return None - def queue_task(self): - pass + def start_task(self, *, task_id: str) -> None: + return None - def start_task(self): - pass + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + return None - def finish_task(self): - pass + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + return None - def start_save_output(self): - pass + def start_save_output(self) -> None: + return None - def finish_save_output(self): - pass + def finish_save_output(self) -> None: + return None - def finish_pipeline(self): - pass \ No newline at end of file + def finish_pipeline(self) -> None: + return None diff --git a/src/adagio/monitor/log.py b/src/adagio/monitor/log.py index 424cae6..1bdfc17 100644 --- a/src/adagio/monitor/log.py +++ b/src/adagio/monitor/log.py @@ -1,2 +1,42 @@ -class LogMonitor: - pass \ No newline at end of file +from __future__ import annotations + +from rich.console import Console + +from .api import Monitor + + +class LogMonitor(Monitor): + def __init__(self, *, console: Console | None = None): + self._console = console or Console(stderr=True) + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + self._console.log(f"pipeline started (tasks={total_tasks})") + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + self._console.log( + f"queued task id={task_id} label={label!r} subtasks={total_subtasks}" + ) + + def start_task(self, *, task_id: str) -> None: + self._console.log(f"started task id={task_id}") + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + details = f" advanced={advance}" + if message: + details += f" message={message!r}" + self._console.log(f"updated task id={task_id}{details}") + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + details = f"status={status}" + if error: + details += f" error={error!r}" + self._console.log(f"finished task id={task_id} {details}") + + def finish_pipeline(self) -> None: + self._console.log("pipeline finished") diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index e69de29..db9bbed 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from rich.console import Console +from rich.progress import ( + BarColumn, + Progress, + TextColumn, + TimeElapsedColumn, +) + +from .api import Monitor + + +@dataclass +class _TaskState: + progress_task_id: int + total_subtasks: int + completed_subtasks: int = 0 + status: str = "pending" + + +class RichMonitor(Monitor): + def __init__(self, *, console: Console | None = None): + self._console = console or Console() + self._progress = Progress( + TextColumn("{task.fields[badge]} {task.fields[label]}\n"), + TextColumn(" "), + BarColumn(bar_width=40), + TextColumn("{task.fields[state]}"), + TimeElapsedColumn(), + console=self._console, + expand=False, + transient=False, + ) + self._task_lookup: dict[str, _TaskState] = {} + self._status_counts: dict[str, int] = { + "completed": 0, + "failed": 0, + "skipped": 0, + } + self._pipeline_started = False + self._total_tasks = 0 + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + if self._pipeline_started: + return + self._pipeline_started = True + self._total_tasks = total_tasks + self._progress.start() + self._console.print("[bold]Task Checklist[/bold]") + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + total = max(total_subtasks, 1) + progress_task_id = self._progress.add_task( + description="", + total=total, + completed=0, + badge="PEND", + label=label, + state=f"pending (0/{total})", + ) + self._task_lookup[task_id] = _TaskState( + progress_task_id=progress_task_id, + total_subtasks=total, + completed_subtasks=0, + status="pending", + ) + + def start_task(self, *, task_id: str) -> None: + task = self._task_lookup.get(task_id) + if task is None: + return + task.status = "running" + self._progress.update( + task.progress_task_id, + badge="RUN", + state=f"running ({task.completed_subtasks}/{task.total_subtasks})", + ) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + task = self._task_lookup.get(task_id) + if task is None: + return + task.completed_subtasks = min( + task.total_subtasks, task.completed_subtasks + max(advance, 0) + ) + state = f"running ({task.completed_subtasks}/{task.total_subtasks})" + if message: + state = f"{state} {message}" + self._progress.update( + task.progress_task_id, + completed=task.completed_subtasks, + state=state, + ) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + task = self._task_lookup.get(task_id) + if task is None: + return + + task.status = status + badge_lookup = { + "completed": "DONE", + "failed": "FAIL", + "skipped": "SKIP", + } + if status == "completed": + task.completed_subtasks = task.total_subtasks + state = f"completed ({task.completed_subtasks}/{task.total_subtasks})" + elif status == "failed": + state = "failed" + if error: + state = f"{state}: {error}" + elif status == "skipped": + task.completed_subtasks = task.total_subtasks + state = f"skipped ({task.completed_subtasks}/{task.total_subtasks})" + else: + state = status + + if status in self._status_counts: + self._status_counts[status] += 1 + + self._progress.update( + task.progress_task_id, + completed=task.completed_subtasks, + badge=badge_lookup.get(status, "PEND"), + state=state, + ) + + def finish_pipeline(self) -> None: + if not self._pipeline_started: + return + self._progress.stop() + pending = self._total_tasks - sum(self._status_counts.values()) + self._console.print( + "Summary: " + f"{self._status_counts['completed']} completed, " + f"{self._status_counts['failed']} failed, " + f"{self._status_counts['skipped']} skipped, " + f"{max(pending, 0)} pending" + ) From 7d6223df7a13303ffa7c90416c5b621a97e1c4be Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 23 Feb 2026 18:26:51 -0800 Subject: [PATCH 08/44] Adds dummy progress ' --- src/adagio/monitor/tty.py | 143 ++++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 58 deletions(-) diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index db9bbed..3bed362 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,14 +1,10 @@ from __future__ import annotations +import time from dataclasses import dataclass from rich.console import Console -from rich.progress import ( - BarColumn, - Progress, - TextColumn, - TimeElapsedColumn, -) +from rich.progress import Progress, TextColumn from .api import Monitor @@ -16,22 +12,22 @@ @dataclass class _TaskState: progress_task_id: int + label: str total_subtasks: int completed_subtasks: int = 0 status: str = "pending" + error: str | None = None + started_at: float | None = None + finished_at: float | None = None class RichMonitor(Monitor): def __init__(self, *, console: Console | None = None): self._console = console or Console() self._progress = Progress( - TextColumn("{task.fields[badge]} {task.fields[label]}\n"), - TextColumn(" "), - BarColumn(bar_width=40), - TextColumn("{task.fields[state]}"), - TimeElapsedColumn(), + TextColumn("{task.fields[row]}"), console=self._console, - expand=False, + expand=True, transient=False, ) self._task_lookup: dict[str, _TaskState] = {} @@ -55,49 +51,40 @@ def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: total = max(total_subtasks, 1) + state = _TaskState( + progress_task_id=-1, + label=label, + total_subtasks=total, + ) + row = self._render_row(state) progress_task_id = self._progress.add_task( description="", total=total, completed=0, - badge="PEND", - label=label, - state=f"pending (0/{total})", - ) - self._task_lookup[task_id] = _TaskState( - progress_task_id=progress_task_id, - total_subtasks=total, - completed_subtasks=0, - status="pending", + row=row, ) + state.progress_task_id = progress_task_id + self._task_lookup[task_id] = state def start_task(self, *, task_id: str) -> None: task = self._task_lookup.get(task_id) if task is None: return task.status = "running" - self._progress.update( - task.progress_task_id, - badge="RUN", - state=f"running ({task.completed_subtasks}/{task.total_subtasks})", - ) + task.started_at = time.monotonic() + self._refresh_row(task) def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: + del message task = self._task_lookup.get(task_id) if task is None: return task.completed_subtasks = min( task.total_subtasks, task.completed_subtasks + max(advance, 0) ) - state = f"running ({task.completed_subtasks}/{task.total_subtasks})" - if message: - state = f"{state} {message}" - self._progress.update( - task.progress_task_id, - completed=task.completed_subtasks, - state=state, - ) + self._refresh_row(task) def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None @@ -107,33 +94,13 @@ def finish_task( return task.status = status - badge_lookup = { - "completed": "DONE", - "failed": "FAIL", - "skipped": "SKIP", - } - if status == "completed": - task.completed_subtasks = task.total_subtasks - state = f"completed ({task.completed_subtasks}/{task.total_subtasks})" - elif status == "failed": - state = "failed" - if error: - state = f"{state}: {error}" - elif status == "skipped": + task.error = error + task.finished_at = time.monotonic() + if status in {"completed", "skipped"}: task.completed_subtasks = task.total_subtasks - state = f"skipped ({task.completed_subtasks}/{task.total_subtasks})" - else: - state = status - if status in self._status_counts: self._status_counts[status] += 1 - - self._progress.update( - task.progress_task_id, - completed=task.completed_subtasks, - badge=badge_lookup.get(status, "PEND"), - state=state, - ) + self._refresh_row(task) def finish_pipeline(self) -> None: if not self._pipeline_started: @@ -147,3 +114,63 @@ def finish_pipeline(self) -> None: f"{self._status_counts['skipped']} skipped, " f"{max(pending, 0)} pending" ) + + def _refresh_row(self, task: _TaskState) -> None: + self._progress.update( + task.progress_task_id, + completed=task.completed_subtasks, + row=self._render_row(task), + ) + + def _render_row(self, task: _TaskState) -> str: + status_styles = { + "pending": ("PEND", "yellow"), + "running": ("RUN", "cyan"), + "completed": ("DONE", "green"), + "failed": ("FAIL", "red"), + "skipped": ("SKIP", "magenta"), + } + badge_text, color = status_styles.get(task.status, ("PEND", "yellow")) + badge = f"[bold {color}]{badge_text}[/]" + bar = _bar_text(task.completed_subtasks, task.total_subtasks, color) + + if task.status == "completed": + state_text = f"completed ({task.completed_subtasks}/{task.total_subtasks})" + elif task.status == "failed": + state_text = "failed" + if task.error: + state_text = f"{state_text}: {task.error}" + elif task.status == "skipped": + state_text = f"skipped ({task.completed_subtasks}/{task.total_subtasks})" + elif task.status == "running": + state_text = f"running ({task.completed_subtasks}/{task.total_subtasks})" + else: + state_text = f"pending ({task.completed_subtasks}/{task.total_subtasks})" + + elapsed = _elapsed(task) + return ( + f"{badge} {task.label}\n" + f" {bar} {state_text} {elapsed}" + ) + + +def _bar_text(completed: int, total: int, color: str, width: int = 40) -> str: + if total <= 0: + total = 1 + ratio = min(max(completed / total, 0.0), 1.0) + filled = int(round(ratio * width)) + empty = width - filled + return f"[{color}]{'━' * filled}[/]{' ' * empty}" + + +def _elapsed(task: _TaskState) -> str: + start = task.started_at + if start is None: + seconds = 0 + elif task.finished_at is not None: + seconds = max(0, int(task.finished_at - start)) + else: + seconds = max(0, int(time.monotonic() - start)) + minutes, sec = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + return f"{hours}:{minutes:02d}:{sec:02d}" From b7c294889f59182da4da973e83d933949a5f3046 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 23 Feb 2026 18:43:23 -0800 Subject: [PATCH 09/44] Small refactor --- src/adagio/cli/dynamic.py | 53 +++----------------------------- src/adagio/cli/runner.py | 52 +++++-------------------------- src/adagio/dummy_execute.py | 54 ++++++--------------------------- src/adagio/execution/context.py | 15 ++------- src/adagio/monitor/api.py | 15 +++++++-- src/adagio/monitor/log.py | 11 +++++-- src/adagio/monitor/tty.py | 15 +++++++-- 7 files changed, 59 insertions(+), 156 deletions(-) diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index fc34e5b..fac1713 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -11,6 +11,7 @@ def _spec_py_type(type_name: str) -> type: + """Map pipeline type text to a Python type.""" normalized = re.sub(r"[^a-z0-9]+", " ", (type_name or "").lower()).strip() tokens = set(normalized.split()) @@ -26,6 +27,7 @@ def _spec_py_type(type_name: str) -> type: def _default_py_type(default: Any) -> type | None: + """Infer a Python type from a default value.""" if isinstance(default, bool): return bool if isinstance(default, int): @@ -38,6 +40,7 @@ def _default_py_type(default: Any) -> type | None: def _resolve_param_type(type_name: str, default: Any) -> type: + """Resolve the CLI parameter type from type text and default.""" declared = _spec_py_type(type_name) inferred = _default_py_type(default) if inferred is None: @@ -55,6 +58,7 @@ def build_dynamic_run( [Path, dict[str, Any], list[tuple[str, str]], list[tuple[str, str]]], None ], ): + """Build a dynamic run command from pipeline input and parameter specs.""" input_bindings: list[tuple[str, str]] = [] param_bindings: list[tuple[str, str]] = [] seen_idents: set[str] = set() @@ -115,55 +119,6 @@ def add_dynamic_option( help_text="Path to an arguments JSON file to pre-populate inputs, parameters, and outputs.", default=None, ) - add_dynamic_option( - ident="dummy", - opt="--dummy", - required=False, - py_type=bool, - help_text="Run a simulated pipeline execution instead of invoking runtime plugins.", - default=True, - ) - add_dynamic_option( - ident="dummy_min_seconds", - opt="--dummy-min-seconds", - required=False, - py_type=float, - help_text="Minimum seconds spent per task in dummy mode.", - default=10.0, - ) - add_dynamic_option( - ident="dummy_max_seconds", - opt="--dummy-max-seconds", - required=False, - py_type=float, - help_text="Maximum seconds spent per task in dummy mode.", - default=15.0, - ) - add_dynamic_option( - ident="dummy_fail_rate", - opt="--dummy-fail-rate", - required=False, - py_type=float, - help_text="Failure probability per task in dummy mode (0.0 to 1.0).", - default=0.0, - ) - add_dynamic_option( - ident="dummy_subtasks", - opt="--dummy-subtasks", - required=False, - py_type=int, - help_text="Number of subtasks shown for each task in dummy mode.", - default=3, - ) - add_dynamic_option( - ident="dummy_seed", - opt="--dummy-seed", - required=False, - py_type=int | None, - help_text="Optional random seed for deterministic dummy runs.", - default=None, - ) - for spec in input_specs: original = spec.name ident = to_identifier(original, "input") diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index b82a719..fe716fc 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,5 +1,4 @@ import json -import sys from pathlib import Path from typing import Any @@ -14,11 +13,8 @@ def run_pipeline_from_kwargs( *, console: Console, ) -> None: - from ..dummy_execute import ( - DummyExecutionConfig, - DummyExecutionFailed, - execute_dummy_pipeline, - ) + """Run a pipeline command from resolved CLI keyword arguments.""" + from ..dummy_execute import execute from ..model.pipeline import AdagioPipeline from ..monitor.tty import RichMonitor @@ -31,15 +27,6 @@ def run_pipeline_from_kwargs( if arguments_file is not None: _merge_arguments_file(arguments, Path(arguments_file)) - dummy_enabled = bool(kwargs.pop("dummy", True)) - dummy_config = DummyExecutionConfig( - min_seconds=float(kwargs.pop("dummy_min_seconds", 10.0)), - max_seconds=float(kwargs.pop("dummy_max_seconds", 15.0)), - fail_rate=float(kwargs.pop("dummy_fail_rate", 0.0)), - subtasks=int(kwargs.pop("dummy_subtasks", 3)), - seed=kwargs.pop("dummy_seed", None), - ) - for ident, original in input_bindings: value = kwargs.get(ident) if value is not None: @@ -50,39 +37,16 @@ def run_pipeline_from_kwargs( arguments.parameters[original] = kwargs.get(ident) console.print(f"[bold]Pipeline:[/bold] {pipeline}") - console.print( - f"[bold]Executing pipeline[/bold] ({'dummy' if dummy_enabled else 'runtime'} mode)" + console.print("[bold]Executing pipeline[/bold] (dummy mode)") + execute( + pipeline=parsed_pipeline, + arguments=arguments, + monitor=RichMonitor(console=console), ) - try: - if dummy_enabled: - execute_dummy_pipeline( - pipeline=parsed_pipeline, - arguments=arguments, - monitor=RichMonitor(console=console), - dummy=dummy_config, - ) - else: - raise SystemExit( - "Runtime execution is temporarily disabled. " - "Use default dummy mode (or pass --dummy)." - ) - except DummyExecutionFailed as exc: - raise SystemExit(str(exc)) from exc - except (ModuleNotFoundError, ImportError) as exc: - if dummy_enabled: - raise - missing = getattr(exc, "name", None) or "unknown" - raise SystemExit( - "Execution dependencies are missing. " - f"Missing module: {missing!r}. " - f"Details: {exc}. " - f"Python executable: {sys.executable}. " - "Install runtime requirements (for example, qiime2/parsl) in that same environment." - ) from exc - def _merge_arguments_file(arguments, arguments_file: Path) -> None: + """Merge values from an arguments file into runtime arguments.""" try: text = arguments_file.read_text(encoding="utf-8") except OSError as exc: diff --git a/src/adagio/dummy_execute.py b/src/adagio/dummy_execute.py index 77aa785..065374c 100644 --- a/src/adagio/dummy_execute.py +++ b/src/adagio/dummy_execute.py @@ -1,8 +1,4 @@ -from __future__ import annotations - -import random import time -from dataclasses import dataclass from typing import Any from adagio.model.arguments import AdagioArguments @@ -11,40 +7,26 @@ from adagio.monitor.log import LogMonitor -@dataclass -class DummyExecutionConfig: - min_seconds: float = 10.0 - max_seconds: float = 15.0 - fail_rate: float = 0.0 - subtasks: int = 3 - seed: int | None = None - - -class DummyExecutionFailed(RuntimeError): - pass +SLEEP_SECONDS = 5.0 +SUBTASK_COUNT = 3 -def execute_dummy_pipeline( +def execute( *, pipeline: AdagioPipeline, arguments: AdagioArguments, monitor: Monitor | None = None, - dummy: DummyExecutionConfig | None = None, ) -> None: + """Execute a pipeline with fixed dummy progress.""" sig = pipeline.signature monitor = monitor or LogMonitor() - dummy = dummy or DummyExecutionConfig() tasks = list(pipeline.iter_tasks()) pipeline.validate_graph() sig.validate_arguments(arguments) - subtasks = max(dummy.subtasks, 1) - fail_rate = min(max(dummy.fail_rate, 0.0), 1.0) - min_seconds, max_seconds = sorted( - (max(dummy.min_seconds, 0.0), max(dummy.max_seconds, 0.0)) - ) - rng = random.Random(dummy.seed) + subtasks = SUBTASK_COUNT + sleep_per_subtask = SLEEP_SECONDS / SUBTASK_COUNT monitor.start_pipeline(total_tasks=len(tasks)) try: @@ -57,26 +39,9 @@ def execute_dummy_pipeline( for task in tasks: monitor.start_task(task_id=task.id) - duration = rng.uniform(min_seconds, max_seconds) - sleep_per_subtask = duration / subtasks - - for subtask_index in range(subtasks): - if sleep_per_subtask > 0: - time.sleep(sleep_per_subtask) - monitor.advance_task( - task_id=task.id, - advance=1, - ) - - if rng.random() < fail_rate: - monitor.finish_task( - task_id=task.id, - status="failed", - error="simulated failure", - ) - raise DummyExecutionFailed( - f"Dummy execution failed at task '{task.id}'." - ) + for _ in range(subtasks): + time.sleep(sleep_per_subtask) + monitor.advance_task(task_id=task.id, advance=1) monitor.finish_task(task_id=task.id, status="completed") finally: @@ -84,6 +49,7 @@ def execute_dummy_pipeline( def _task_label(task: Any) -> str: + """Build a human-readable label for a task.""" kind = getattr(task, "kind", "unknown") task_id = getattr(task, "id", "") if kind == "plugin-action": diff --git a/src/adagio/execution/context.py b/src/adagio/execution/context.py index a624804..e82ad52 100644 --- a/src/adagio/execution/context.py +++ b/src/adagio/execution/context.py @@ -2,23 +2,14 @@ from qiime2.sdk.proxy import ProxyResults, Proxy from qiime2.sdk import Pipeline, Results -try: - from qiime2.sdk.context import ParallelContext as _BaseContext -except ImportError: # pragma: no cover - older qiime2 - from qiime2.sdk.context import Context as _BaseContext +from qiime2.sdk.context import ParallelContext from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl -class AdagioContext(_BaseContext): +class AdagioContext(ParallelContext): def __init__(self, action_obj=None, parent=None): - try: - # Newer qiime2 parallel context API - super().__init__(action_obj, parent) - except TypeError: - # qiime2<=2024.10: Context(parent=None, parallel=False) - super().__init__(parent=parent, parallel=True) - self.action_obj = action_obj + super().__init__(action_obj, parent) def _callable_action_(self, *args, **kwargs): diff --git a/src/adagio/monitor/api.py b/src/adagio/monitor/api.py index 8a8de02..4783e53 100644 --- a/src/adagio/monitor/api.py +++ b/src/adagio/monitor/api.py @@ -1,39 +1,48 @@ -from __future__ import annotations - - class Monitor: + """Define monitor hooks used by pipeline execution.""" + def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Start tracking a pipeline run.""" return None def start_load_input(self) -> None: + """Start tracking input loading.""" return None def finish_load_input(self) -> None: + """Finish tracking input loading.""" return None def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: + """Queue a task before execution starts.""" return None def start_task(self, *, task_id: str) -> None: + """Start tracking an individual task.""" return None def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: + """Advance progress for an individual task.""" return None def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: + """Finish tracking an individual task.""" return None def start_save_output(self) -> None: + """Start tracking output saving.""" return None def finish_save_output(self) -> None: + """Finish tracking output saving.""" return None def finish_pipeline(self) -> None: + """Finish tracking a pipeline run.""" return None diff --git a/src/adagio/monitor/log.py b/src/adagio/monitor/log.py index 1bdfc17..1a96897 100644 --- a/src/adagio/monitor/log.py +++ b/src/adagio/monitor/log.py @@ -1,30 +1,35 @@ -from __future__ import annotations - from rich.console import Console from .api import Monitor class LogMonitor(Monitor): + """Log monitor events to a Rich console.""" + def __init__(self, *, console: Console | None = None): + """Initialize the log monitor.""" self._console = console or Console(stderr=True) def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Log pipeline start.""" self._console.log(f"pipeline started (tasks={total_tasks})") def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: + """Log task queueing.""" self._console.log( f"queued task id={task_id} label={label!r} subtasks={total_subtasks}" ) def start_task(self, *, task_id: str) -> None: + """Log task start.""" self._console.log(f"started task id={task_id}") def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: + """Log task progress updates.""" details = f" advanced={advance}" if message: details += f" message={message!r}" @@ -33,10 +38,12 @@ def advance_task( def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: + """Log task completion.""" details = f"status={status}" if error: details += f" error={error!r}" self._console.log(f"finished task id={task_id} {details}") def finish_pipeline(self) -> None: + """Log pipeline completion.""" self._console.log("pipeline finished") diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 3bed362..2782648 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import time from dataclasses import dataclass @@ -22,7 +20,10 @@ class _TaskState: class RichMonitor(Monitor): + """Render pipeline progress in a stacked Rich layout.""" + def __init__(self, *, console: Console | None = None): + """Initialize the Rich monitor.""" self._console = console or Console() self._progress = Progress( TextColumn("{task.fields[row]}"), @@ -40,6 +41,7 @@ def __init__(self, *, console: Console | None = None): self._total_tasks = 0 def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Start rendering pipeline progress.""" if self._pipeline_started: return self._pipeline_started = True @@ -50,6 +52,7 @@ def start_pipeline(self, *, total_tasks: int = 0) -> None: def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: + """Queue a task row in the progress view.""" total = max(total_subtasks, 1) state = _TaskState( progress_task_id=-1, @@ -67,6 +70,7 @@ def queue_task( self._task_lookup[task_id] = state def start_task(self, *, task_id: str) -> None: + """Mark a task as running.""" task = self._task_lookup.get(task_id) if task is None: return @@ -77,6 +81,7 @@ def start_task(self, *, task_id: str) -> None: def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: + """Advance a task's subtask progress.""" del message task = self._task_lookup.get(task_id) if task is None: @@ -89,6 +94,7 @@ def advance_task( def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: + """Mark a task as finished.""" task = self._task_lookup.get(task_id) if task is None: return @@ -103,6 +109,7 @@ def finish_task( self._refresh_row(task) def finish_pipeline(self) -> None: + """Stop rendering and print a summary.""" if not self._pipeline_started: return self._progress.stop() @@ -116,6 +123,7 @@ def finish_pipeline(self) -> None: ) def _refresh_row(self, task: _TaskState) -> None: + """Refresh a rendered task row.""" self._progress.update( task.progress_task_id, completed=task.completed_subtasks, @@ -123,6 +131,7 @@ def _refresh_row(self, task: _TaskState) -> None: ) def _render_row(self, task: _TaskState) -> str: + """Build a stacked two-line row for a task.""" status_styles = { "pending": ("PEND", "yellow"), "running": ("RUN", "cyan"), @@ -155,6 +164,7 @@ def _render_row(self, task: _TaskState) -> str: def _bar_text(completed: int, total: int, color: str, width: int = 40) -> str: + """Build a colored progress bar string.""" if total <= 0: total = 1 ratio = min(max(completed / total, 0.0), 1.0) @@ -164,6 +174,7 @@ def _bar_text(completed: int, total: int, color: str, width: int = 40) -> str: def _elapsed(task: _TaskState) -> str: + """Format elapsed task time as H:MM:SS.""" start = task.started_at if start is None: seconds = 0 From b315230106bd25c1c68d9e71c1f20dce423a1654 Mon Sep 17 00:00:00 2001 From: John Chase Date: Tue, 24 Feb 2026 22:09:11 -0800 Subject: [PATCH 10/44] Adjusts progress bars --- src/adagio/monitor/tty.py | 82 ++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 2782648..f84150f 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,3 +1,4 @@ +import re import time from dataclasses import dataclass @@ -6,6 +7,12 @@ from .api import Monitor +BADGE_WIDTH = 8 +LABEL_WIDTH = 28 +BAR_WIDTH = 28 +COUNTER_WIDTH = 5 +ELAPSED_WIDTH = 4 + @dataclass class _TaskState: @@ -20,7 +27,7 @@ class _TaskState: class RichMonitor(Monitor): - """Render pipeline progress in a stacked Rich layout.""" + """Render compact pipeline progress rows.""" def __init__(self, *, console: Console | None = None): """Initialize the Rich monitor.""" @@ -47,7 +54,7 @@ def start_pipeline(self, *, total_tasks: int = 0) -> None: self._pipeline_started = True self._total_tasks = total_tasks self._progress.start() - self._console.print("[bold]Task Checklist[/bold]") + self._console.print("[bold]Task Progress[/bold]") def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 @@ -131,39 +138,45 @@ def _refresh_row(self, task: _TaskState) -> None: ) def _render_row(self, task: _TaskState) -> str: - """Build a stacked two-line row for a task.""" - status_styles = { - "pending": ("PEND", "yellow"), - "running": ("RUN", "cyan"), - "completed": ("DONE", "green"), - "failed": ("FAIL", "red"), - "skipped": ("SKIP", "magenta"), - } - badge_text, color = status_styles.get(task.status, ("PEND", "yellow")) - badge = f"[bold {color}]{badge_text}[/]" - bar = _bar_text(task.completed_subtasks, task.total_subtasks, color) - - if task.status == "completed": - state_text = f"completed ({task.completed_subtasks}/{task.total_subtasks})" - elif task.status == "failed": - state_text = "failed" - if task.error: - state_text = f"{state_text}: {task.error}" - elif task.status == "skipped": - state_text = f"skipped ({task.completed_subtasks}/{task.total_subtasks})" - elif task.status == "running": - state_text = f"running ({task.completed_subtasks}/{task.total_subtasks})" - else: - state_text = f"pending ({task.completed_subtasks}/{task.total_subtasks})" - + """Build a compact row for a task.""" + badge_text, color = _status_style(task.status) + badge_plain = badge_text.ljust(BADGE_WIDTH) + badge = f"[bold {color}]{badge_plain}[/]" + label = _compact_label(task.label, LABEL_WIDTH).ljust(LABEL_WIDTH) + bar = _bar_text(task.completed_subtasks, task.total_subtasks, color, BAR_WIDTH) + counter = f"{task.completed_subtasks}/{task.total_subtasks}" elapsed = _elapsed(task) + error = "" + if task.status == "failed" and task.error: + error = f" [red]{task.error}[/]" return ( - f"{badge} {task.label}\n" - f" {bar} {state_text} {elapsed}" + f"{badge} {label} {bar} " + f"{counter.rjust(COUNTER_WIDTH)} {elapsed.rjust(ELAPSED_WIDTH)}{error}" ) -def _bar_text(completed: int, total: int, color: str, width: int = 40) -> str: +def _status_style(status: str) -> tuple[str, str]: + """Map task state to badge text and color.""" + lookup = { + "pending": ("PENDING", "yellow"), + "running": ("RUNNING", "cyan"), + "completed": ("DONE", "green"), + "failed": ("FAILED", "red"), + "skipped": ("SKIPPED", "magenta"), + } + return lookup.get(status, ("PENDING", "yellow")) + + +def _compact_label(label: str, width: int = 28) -> str: + """Trim task labels to a compact display name.""" + match = re.search(r"\(([^)]+)\)\s*$", label) + compact = match.group(1) if match else label + if len(compact) <= width: + return compact + return compact[: width - 1] + "…" + + +def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: """Build a colored progress bar string.""" if total <= 0: total = 1 @@ -174,14 +187,13 @@ def _bar_text(completed: int, total: int, color: str, width: int = 40) -> str: def _elapsed(task: _TaskState) -> str: - """Format elapsed task time as H:MM:SS.""" + """Format elapsed task time as M:SS.""" start = task.started_at if start is None: - seconds = 0 - elif task.finished_at is not None: + return "0:00" + if task.finished_at is not None: seconds = max(0, int(task.finished_at - start)) else: seconds = max(0, int(time.monotonic() - start)) minutes, sec = divmod(seconds, 60) - hours, minutes = divmod(minutes, 60) - return f"{hours}:{minutes:02d}:{sec:02d}" + return f"{minutes}:{sec:02d}" From 2394adae66fad367d58831713330f4cb96e86615 Mon Sep 17 00:00:00 2001 From: John Chase Date: Wed, 25 Feb 2026 10:38:38 -0800 Subject: [PATCH 11/44] Adds param grouping --- src/adagio/cli/dynamic.py | 63 +++++++++++++++++++++++++++++++++++++++ src/adagio/cli/main.py | 9 ++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 500949a..b975050 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, Annotated, Callable +from cyclopts import Group from cyclopts import Parameter as CliParameter from ..app.parsers.pipeline import Input as InputSpec @@ -10,6 +11,55 @@ from .args import ParamType, ShowParamsMode, dynamic_opt, to_identifier +class _PipelineGroupFormatter: + """Render pipeline options in one panel with nested subsections.""" + + def __call__(self, console: Any, options: Any, panel: Any) -> None: + from rich.console import Group as RichGroup + from rich.console import NewLine + from rich.text import Text + + from cyclopts.help.specs import PanelSpec, TableSpec, get_default_parameter_columns + + input_entries, parameter_entries = _split_pipeline_entries(panel.entries) + renderables: list[Any] = [] + + if panel.description: + renderables.append(panel.description) + + def add_section(title: str, entries: list[Any]) -> None: + if not entries: + return + if renderables: + renderables.append(NewLine()) + renderables.append(Text(title, style="bold")) + columns = get_default_parameter_columns(console, options, entries) + renderables.append(TableSpec().build(columns, entries)) + + add_section("Inputs", input_entries) + add_section("Parameters", parameter_entries) + + if not renderables: + return + + console.print(PanelSpec().build(RichGroup(*renderables), title=panel.title)) + + +def _split_pipeline_entries(entries: list[Any]) -> tuple[list[Any], list[Any]]: + input_entries: list[Any] = [] + parameter_entries: list[Any] = [] + + for entry in entries: + options = entry.all_options if hasattr(entry, "all_options") else () + long_name = next((name for name in options if name.startswith("--")), "") + if long_name.startswith("--input-"): + input_entries.append(entry) + else: + parameter_entries.append(entry) + + return input_entries, parameter_entries + + def _spec_py_type(type_name: str) -> type: """Map pipeline type text to a Python type.""" normalized = re.sub(r"[^a-z0-9]+", " ", (type_name or "").lower()).strip() @@ -74,12 +124,19 @@ def build_dynamic_run( required_params: list[str] = [] seen_idents: set[str] = set() seen_opts: set[str] = {"--pipeline", "-p", "--arguments", "--show-params"} + command_group = Group("Command Options", sort_key=0) + pipeline_group = Group( + "Pipeline", + sort_key=1, + help_formatter=_PipelineGroupFormatter(), + ) annotations: dict[str, Any] = { "pipeline": Annotated[ Path, CliParameter( name=("--pipeline", "-p"), + group=command_group, help="Path to the pipeline JSON file.", ), ] @@ -89,6 +146,7 @@ def build_dynamic_run( Path | None, CliParameter( name=("--arguments",), + group=command_group, help="Path to a JSON arguments file. Values are applied before CLI overrides.", ), ] @@ -96,6 +154,7 @@ def build_dynamic_run( ShowParamsMode, CliParameter( name=("--show-params",), + group=command_group, help="Parameter display mode: all, missing, or required.", ), ] @@ -128,6 +187,7 @@ def add_dynamic_option( py_type: Any, help_text: str, default: Any, + group: Group | tuple[Group, ...], ) -> None: if opt in seen_opts: raise ValueError(f"Conflicting CLI option generated: {opt!r}.") @@ -138,6 +198,7 @@ def add_dynamic_option( annotation_type, CliParameter( name=(opt,), + group=group, help=help_text, required=required, ), @@ -176,6 +237,7 @@ def add_dynamic_option( + (" [required]" if spec.required else "") ), default=None, + group=pipeline_group, ) for spec in param_specs: @@ -208,6 +270,7 @@ def add_dynamic_option( + default_text ), default=param_default, + group=pipeline_group, ) def run( diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 6ddc240..0cd498f 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Annotated, Any -from cyclopts import App, Parameter +from cyclopts import App, Group, Parameter from rich.console import Console from ..app.parsers.pipeline import Input as InputSpec @@ -41,6 +41,7 @@ def main(argv: list[str] | None = None) -> None: ) if not pipeline_str: + command_group = Group("Command Options", sort_key=0) @app.command def run( @@ -48,13 +49,16 @@ def run( pipeline: Annotated[ Path, Parameter( - name=("--pipeline", "-p"), help="Path to the pipeline JSON file." + name=("--pipeline", "-p"), + group=command_group, + help="Path to the pipeline JSON file.", ), ], arguments: Annotated[ Path | None, Parameter( name=("--arguments",), + group=command_group, help="Path to a JSON arguments file.", ), ] = None, @@ -62,6 +66,7 @@ def run( ShowParamsMode, Parameter( name=("--show-params",), + group=command_group, help="Parameter display mode: all, missing, or required.", ), ] = ShowParamsMode.REQUIRED, From 18c57bd087144a699eec9936374ed5f3fb66d61f Mon Sep 17 00:00:00 2001 From: John Chase Date: Wed, 25 Feb 2026 22:42:12 -0800 Subject: [PATCH 12/44] Fixes runtime --- examples/arguments.json | 23 ++- src/adagio/cli/main.py | 6 + src/adagio/cli/runtime.py | 282 +++++++++++++++++++++++++++ src/adagio/execution/proxy.py | 2 +- src/adagio/model/pipeline.py | 12 +- src/adagio/monitor/api.py | 12 ++ src/adagio/monitor/composite.py | 76 ++++++++ src/adagio/monitor/connected.py | 108 +++++++++++ src/adagio/monitor/log.py | 23 +++ src/adagio/serial_execute.py | 329 ++++++++++++++++++++++++++++++++ 10 files changed, 864 insertions(+), 9 deletions(-) create mode 100644 src/adagio/cli/runtime.py create mode 100644 src/adagio/monitor/composite.py create mode 100644 src/adagio/monitor/connected.py create mode 100644 src/adagio/serial_execute.py diff --git a/examples/arguments.json b/examples/arguments.json index 4148c4a..02092eb 100644 --- a/examples/arguments.json +++ b/examples/arguments.json @@ -1,9 +1,20 @@ { - "inputs": {}, - "outputs": {}, - "parameters": { - "barcodes": "barcodes", - "trunc_len": 120, - "trim_foobar": 150 + "emp-paired-1": { + "inputs": {}, + "outputs": {}, + "parameters": { + "barcodes": "barcodes", + "trunc_len": 120, + "trim_foobar": 150 + } + }, + "foo-paired-1": { + "inputs": {}, + "outputs": {}, + "parameters": { + "barcodes": "barcodes", + "trunc_len": 120, + "trim_foobar": 150 + } } } diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 0cd498f..be870b5 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -21,6 +21,12 @@ def main(argv: list[str] | None = None) -> None: argv = sys.argv[1:] if argv is None else argv + if argv and argv[0] == "runtime": + from .runtime import run_runtime + + run_runtime(argv[1:], console=console) + return + argv, positional_pipeline = promote_positional_pipeline(argv) pipeline_str = extract_flag_value(argv, "--pipeline", "-p") show_mode_str = extract_flag_value(argv, "--show-params") diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py new file mode 100644 index 0000000..c3d0a8e --- /dev/null +++ b/src/adagio/cli/runtime.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import argparse +import json +import os +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +from rich.console import Console + +from ..model.arguments import AdagioArguments +from ..model.pipeline import AdagioPipeline +from ..monitor.composite import CompositeMonitor +from ..monitor.connected import ConnectedMonitor +from ..monitor.log import LogMonitor + + +def run_runtime(argv: list[str], *, console: Console) -> None: + """Runtime entrypoint used by the runtime-adapter job container.""" + parser = argparse.ArgumentParser( + prog="adagio runtime", + description="Execute a pipeline from spec/config/arguments files.", + ) + parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") + parser.add_argument("--config", required=True, help="Path to config JSON.") + parser.add_argument("--arguments", required=False, help="Path to run arguments JSON.") + parser.add_argument("--job-id", required=False, help="Runtime job ID.") + parser.add_argument("--output-dir", required=False, help="Directory for output artifacts.") + parser.add_argument("--runtime-url", required=False, help="Runtime adapter API base URL.") + parser.add_argument( + "--connected", + action="store_true", + help="Emit execution status updates to the runtime-adapter.", + ) + + opts = parser.parse_args(argv) + + spec_data = _load_json(Path(opts.spec)) + _ = _load_json(Path(opts.config)) + runtime_arguments: Any = {} + if opts.arguments: + runtime_arguments = _load_json(Path(opts.arguments)) + if runtime_arguments is None: + runtime_arguments = {} + + pipeline = _parse_pipeline(spec_data) + output_dir = _resolve_output_dir(opts.output_dir, opts.job_id) + arguments = _build_arguments( + pipeline=pipeline, + runtime_arguments=runtime_arguments, + output_dir=output_dir, + ) + _validate_required_arguments(pipeline, arguments) + + connected = bool(opts.connected and opts.job_id and (opts.runtime_url or os.getenv("RUNTIME_URL"))) + runtime_url = opts.runtime_url or os.getenv("RUNTIME_URL") + + log_monitor = LogMonitor(console=console) + monitor = log_monitor + if connected and runtime_url: + monitor = CompositeMonitor( + log_monitor, + ConnectedMonitor(runtime_url=runtime_url, job_id=opts.job_id or ""), + ) + + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "running"}, + ) + + from ..serial_execute import execute_serial + + try: + execute_serial(pipeline=pipeline, arguments=arguments, monitor=monitor) + except Exception as exc: # noqa: BLE001 + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "failed", "error": str(exc)}, + ) + raise + else: + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "succeeded"}, + ) + + +def _load_json(path: Path) -> Any: + return json.loads(path.read_text(encoding="utf-8")) + + +def _parse_pipeline(data: Any) -> AdagioPipeline: + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + return AdagioPipeline.model_validate(pipeline_data) + + +def _resolve_output_dir(raw_output_dir: str | None, job_id: str | None) -> str: + if raw_output_dir: + output_dir = raw_output_dir + elif job_id: + output_dir = f"/storage/runtime_jobs/{job_id}/outputs" + else: + output_dir = "/storage/runtime_outputs" + os.makedirs(output_dir, exist_ok=True) + return output_dir + + +def _build_arguments( + *, + pipeline: AdagioPipeline, + runtime_arguments: Any, + output_dir: str, +) -> AdagioArguments: + arguments = pipeline.signature.to_default_arguments() + storage_root = "/storage" + + if isinstance(runtime_arguments, dict): + if isinstance(runtime_arguments.get("inputs"), dict): + _apply_named_arguments(arguments=arguments, runtime_arguments=runtime_arguments, storage_root=storage_root) + else: + _apply_legacy_arguments( + pipeline=pipeline, + arguments=arguments, + runtime_arguments=runtime_arguments, + storage_root=storage_root, + ) + + resolved_outputs = _resolve_outputs(runtime_arguments.get("outputs"), storage_root=storage_root) + if resolved_outputs is not None: + arguments.outputs = resolved_outputs + + if _outputs_need_default(arguments.outputs): + arguments.outputs = output_dir + + return arguments + + +def _apply_named_arguments( + *, arguments: AdagioArguments, runtime_arguments: dict[str, Any], storage_root: str +) -> None: + raw_inputs = runtime_arguments.get("inputs", {}) + if isinstance(raw_inputs, dict): + for name, value in raw_inputs.items(): + arguments.inputs[name] = _resolve_input_path(value, storage_root=storage_root) + + raw_parameters = runtime_arguments.get("parameters", {}) + if isinstance(raw_parameters, dict): + arguments.parameters.update(raw_parameters) + + +def _apply_legacy_arguments( + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + runtime_arguments: dict[str, Any], + storage_root: str, +) -> None: + preprocessing = runtime_arguments.get("preprocessing", {}) + root_artifacts = preprocessing.get("root_artifacts", []) if isinstance(preprocessing, dict) else [] + token_lookup: dict[str, Any] = {} + if isinstance(root_artifacts, list): + for artifact in root_artifacts: + if not isinstance(artifact, dict): + continue + artifact_id = artifact.get("id") + token = artifact.get("token") + if artifact_id is None: + continue + token_lookup[str(artifact_id)] = token + + for input_def in pipeline.signature.inputs: + token = token_lookup.get(str(input_def.id)) + if token is None: + continue + arguments.inputs[input_def.name] = _resolve_input_path(token, storage_root=storage_root) + + named_inputs = runtime_arguments.get("inputs", {}) + if isinstance(named_inputs, dict): + for name, value in named_inputs.items(): + arguments.inputs[name] = _resolve_input_path(value, storage_root=storage_root) + + task_arguments = runtime_arguments.get("arguments", {}) + if isinstance(task_arguments, dict): + for step in task_arguments.values(): + if not isinstance(step, dict): + continue + params = step.get("parameters", {}) + if isinstance(params, dict): + arguments.parameters.update(params) + + top_level_params = runtime_arguments.get("parameters", {}) + if isinstance(top_level_params, dict): + arguments.parameters.update(top_level_params) + + +def _resolve_input_path(value: Any, *, storage_root: str) -> str: + if isinstance(value, dict): + path = value.get("path") + if path is None: + return str(value) + return _normalize_path(path, storage_root=storage_root) + if isinstance(value, str): + return _normalize_path(value, storage_root=storage_root) + return str(value) + + +def _resolve_outputs(value: Any, *, storage_root: str) -> str | dict[str, str] | None: + if value is None: + return None + if isinstance(value, str): + return _normalize_path(value, storage_root=storage_root) + if isinstance(value, dict): + resolved: dict[str, str] = {} + for name, output in value.items(): + if isinstance(output, dict): + resolved[name] = _resolve_input_path(output, storage_root=storage_root) + elif isinstance(output, str): + resolved[name] = _normalize_path(output, storage_root=storage_root) + else: + resolved[name] = str(output) + return resolved + return None + + +def _normalize_path(path: str, *, storage_root: str) -> str: + if not path: + return path + if path.startswith("/") or "://" in path: + return path + return os.path.join(storage_root, path) + + +def _outputs_need_default(outputs: str | dict[str, str]) -> bool: + if isinstance(outputs, str): + return outputs == "" or outputs == "" + return any(value in {"", ""} for value in outputs.values()) + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" or value == "" + + +def _validate_required_arguments(pipeline: AdagioPipeline, arguments: AdagioArguments) -> None: + missing_inputs = [ + input_def.name + for input_def in pipeline.signature.inputs + if input_def.required and _is_missing(arguments.inputs.get(input_def.name)) + ] + missing_params = [ + param.name + for param in pipeline.signature.parameters + if param.required and param.default is None and _is_missing(arguments.parameters.get(param.name)) + ] + + if missing_inputs or missing_params: + missing = [f"input:{name}" for name in missing_inputs] + [f"param:{name}" for name in missing_params] + raise SystemExit("Missing required runtime arguments: " + ", ".join(missing)) + + +def _post_job_event(*, runtime_url: str, job_id: str, payload: dict[str, Any]) -> None: + base = runtime_url.rstrip("/") + url = f"{base}/jobs/{job_id}/events" + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + url, + data=data, + method="POST", + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=5): + pass + except (urllib.error.URLError, TimeoutError): + return None diff --git a/src/adagio/execution/proxy.py b/src/adagio/execution/proxy.py index 58d7052..44860b3 100644 --- a/src/adagio/execution/proxy.py +++ b/src/adagio/execution/proxy.py @@ -196,4 +196,4 @@ def _detach(value): sel = value._selector_ return (future, lambda result: result.get_column(sel)) else: - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/adagio/model/pipeline.py b/src/adagio/model/pipeline.py index f5bc935..68bf31a 100644 --- a/src/adagio/model/pipeline.py +++ b/src/adagio/model/pipeline.py @@ -7,7 +7,7 @@ from .arguments import AdagioArguments from .task import AllowableValue, AdagioTask -from .ast import TypeAST +from .ast import TypeAST, TypeASTExpression, TypeASTIntersection, TypeASTUnion class AdagioPipeline(BaseModel): @@ -69,7 +69,7 @@ def load_inputs(self, ctx, arguments, scope): for input in self.inputs: source = arguments.inputs[input.name] - if input.ast.name.startswith('Metadata') and input.ast.builtin: + if _is_metadata_ast(input.ast): print("SCHEDULED:", f'load_metadata({source!r})') scope[input.id] = load_metadata(ctx=ctx, source=source) # IIFE for the dreaded for-loop in the parent closure problem. @@ -123,3 +123,11 @@ class _ParameterDef(_Def): class _OutputDef(_Def): pass + + +def _is_metadata_ast(ast: TypeAST) -> bool: + if isinstance(ast, TypeASTExpression): + return bool(ast.builtin and ast.name.startswith("Metadata")) + if isinstance(ast, (TypeASTUnion, TypeASTIntersection)): + return any(_is_metadata_ast(member) for member in ast.members) + return False diff --git a/src/adagio/monitor/api.py b/src/adagio/monitor/api.py index 4783e53..93c4bbe 100644 --- a/src/adagio/monitor/api.py +++ b/src/adagio/monitor/api.py @@ -39,6 +39,18 @@ def start_save_output(self) -> None: """Start tracking output saving.""" return None + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + """Track completion for an individual output artifact.""" + return None + def finish_save_output(self) -> None: """Finish tracking output saving.""" return None diff --git a/src/adagio/monitor/composite.py b/src/adagio/monitor/composite.py new file mode 100644 index 0000000..a465ed6 --- /dev/null +++ b/src/adagio/monitor/composite.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from .api import Monitor + + +class CompositeMonitor(Monitor): + """Fan out monitor hooks to multiple monitor instances.""" + + def __init__(self, *monitors: Monitor): + self._monitors = tuple(monitors) + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + for monitor in self._monitors: + monitor.start_pipeline(total_tasks=total_tasks) + + def start_load_input(self) -> None: + for monitor in self._monitors: + monitor.start_load_input() + + def finish_load_input(self) -> None: + for monitor in self._monitors: + monitor.finish_load_input() + + def queue_task(self, *, task_id: str, label: str, total_subtasks: int = 1) -> None: + for monitor in self._monitors: + monitor.queue_task( + task_id=task_id, + label=label, + total_subtasks=total_subtasks, + ) + + def start_task(self, *, task_id: str) -> None: + for monitor in self._monitors: + monitor.start_task(task_id=task_id) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + for monitor in self._monitors: + monitor.advance_task(task_id=task_id, advance=advance, message=message) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + for monitor in self._monitors: + monitor.finish_task(task_id=task_id, status=status, error=error) + + def start_save_output(self) -> None: + for monitor in self._monitors: + monitor.start_save_output() + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + for monitor in self._monitors: + monitor.finish_output( + output_id=output_id, + output_name=output_name, + destination=destination, + status=status, + error=error, + ) + + def finish_save_output(self) -> None: + for monitor in self._monitors: + monitor.finish_save_output() + + def finish_pipeline(self) -> None: + for monitor in self._monitors: + monitor.finish_pipeline() diff --git a/src/adagio/monitor/connected.py b/src/adagio/monitor/connected.py new file mode 100644 index 0000000..913b657 --- /dev/null +++ b/src/adagio/monitor/connected.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from typing import Any + +from .api import Monitor + + +class ConnectedMonitor(Monitor): + """Send monitor lifecycle events to the runtime-adapter.""" + + def __init__(self, *, runtime_url: str, job_id: str, timeout: float = 5.0): + base = runtime_url.rstrip("/") + self._url = f"{base}/jobs/{job_id}/events" + self._timeout = timeout + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + self._post(event="pipeline_start", total_tasks=total_tasks) + + def start_load_input(self) -> None: + self._post(event="load_input_start") + + def finish_load_input(self) -> None: + self._post(event="load_input_finish") + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + self._post( + event="task_queued", + task_id=task_id, + label=label, + total_subtasks=total_subtasks, + ) + + def start_task(self, *, task_id: str) -> None: + self._post(event="task_started", task_id=task_id) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + payload: dict[str, Any] = { + "event": "task_progress", + "task_id": task_id, + "advance": advance, + } + if message: + payload["message"] = message + self._post(**payload) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + payload: dict[str, Any] = { + "event": "task_finished", + "task_id": task_id, + "status": status, + } + if error: + payload["error"] = error + self._post(**payload) + + def start_save_output(self) -> None: + self._post(event="save_output_start") + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + payload: dict[str, Any] = { + "event": "output_saved", + "output_id": output_id, + "output_name": output_name, + "destination": destination, + "status": status, + } + if error: + payload["error"] = error + self._post(**payload) + + def finish_save_output(self) -> None: + self._post(event="save_output_finish") + + def finish_pipeline(self) -> None: + self._post(event="pipeline_finish") + + def _post(self, **payload: Any) -> None: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + self._url, + data=data, + method="POST", + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=self._timeout): + pass + except (urllib.error.URLError, TimeoutError): + # Best-effort telemetry: execution should continue even if the + # adapter is unavailable. + return None diff --git a/src/adagio/monitor/log.py b/src/adagio/monitor/log.py index 1a96897..e350025 100644 --- a/src/adagio/monitor/log.py +++ b/src/adagio/monitor/log.py @@ -44,6 +44,29 @@ def finish_task( details += f" error={error!r}" self._console.log(f"finished task id={task_id} {details}") + def start_save_output(self) -> None: + """Log output-save start.""" + self._console.log("saving outputs") + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + """Log completion of an individual output.""" + details = f"status={status} id={output_id} name={output_name!r} destination={destination!r}" + if error: + details += f" error={error!r}" + self._console.log(f"saved output {details}") + + def finish_save_output(self) -> None: + """Log output-save completion.""" + self._console.log("finished saving outputs") + def finish_pipeline(self) -> None: """Log pipeline completion.""" self._console.log("pipeline finished") diff --git a/src/adagio/serial_execute.py b/src/adagio/serial_execute.py new file mode 100644 index 0000000..e07a238 --- /dev/null +++ b/src/adagio/serial_execute.py @@ -0,0 +1,329 @@ +from __future__ import annotations + +import os +import typing as t +import zipfile +from collections.abc import Mapping + +from adagio.model.arguments import AdagioArguments +from adagio.model.ast import TypeAST, TypeASTExpression, TypeASTIntersection, TypeASTUnion +from adagio.model.pipeline import AdagioPipeline +from adagio.model.task import PluginActionTask, RootInputTask +from adagio.monitor.api import Monitor +from adagio.monitor.log import LogMonitor + + +def execute_serial( + *, pipeline: AdagioPipeline, arguments: AdagioArguments, monitor: Monitor | None = None +) -> None: + """Execute a pipeline serially using the QIIME API (no Parsl).""" + from qiime2 import get_cache + from qiime2.sdk import PluginManager + + sig = pipeline.signature + tasks = list(pipeline.iter_tasks()) + monitor = monitor or LogMonitor() + + pipeline.validate_graph() + sig.validate_arguments(arguments) + + monitor.start_pipeline(total_tasks=len(tasks)) + try: + for task in tasks: + monitor.queue_task( + task_id=task.id, + label=_task_label(task), + total_subtasks=1, + ) + + plugin_manager = PluginManager() + cache = get_cache() + with cache: + scope: dict[str, t.Any] = {} + completed_task_ids: set[str] = set() + + monitor.start_load_input() + _load_inputs(sig=sig, arguments=arguments, scope=scope) + monitor.finish_load_input() + + params = sig.get_params(arguments) + + for task in _iter_tasks_in_execution_order(tasks=tasks, scope=scope): + monitor.start_task(task_id=task.id) + try: + _execute_task(task=task, plugin_manager=plugin_manager, params=params, scope=scope) + monitor.advance_task(task_id=task.id, advance=1) + monitor.finish_task(task_id=task.id, status="completed") + completed_task_ids.add(task.id) + except Exception as exc: # noqa: BLE001 + monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) + for skipped_task in tasks: + if skipped_task.id == task.id or skipped_task.id in completed_task_ids: + continue + monitor.finish_task( + task_id=skipped_task.id, + status="skipped", + error=f"Skipped because task {task.id!r} failed.", + ) + raise + + monitor.start_save_output() + _save_outputs(sig=sig, arguments=arguments, scope=scope, monitor=monitor) + monitor.finish_save_output() + finally: + monitor.finish_pipeline() + + +def _load_inputs(*, sig, arguments: AdagioArguments, scope: dict[str, t.Any]) -> None: + from qiime2 import Artifact + + for input_def in sig.inputs: + source = arguments.inputs[input_def.name] + if _is_metadata_ast(input_def.ast): + scope[input_def.id] = _load_metadata(source) + else: + scope[input_def.id] = Artifact.load(source) + + +def _load_metadata(source: str) -> t.Any: + from qiime2 import Artifact, Metadata + + if zipfile.is_zipfile(source): + return Artifact.load(source).view(Metadata) + return Metadata.load(source) + + +def _execute_task(*, task: t.Any, plugin_manager, params: dict[str, t.Any], scope: dict[str, t.Any]) -> None: + if isinstance(task, RootInputTask): + for name, src in task.inputs.items(): + dst = task.outputs[name] + scope[dst.id] = scope[src.id] + return None + + if isinstance(task, PluginActionTask): + _execute_plugin_action(task=task, plugin_manager=plugin_manager, params=params, scope=scope) + return None + + raise TypeError(f"Unsupported task type: {type(task)}") + + +def _execute_plugin_action( + *, task: PluginActionTask, plugin_manager, params: dict[str, t.Any], scope: dict[str, t.Any] +) -> None: + plugins = plugin_manager.plugins + resolved_plugin_name, plugin = _resolve_key(plugins, task.plugin) + if plugin is None: + available_plugins = ", ".join(sorted(plugins.keys())[:20]) + raise KeyError( + "Unable to find QIIME plugin " + f"{task.plugin!r} for task {task.id!r}. " + "This usually means the runtime image is missing required plugins. " + f"Available plugins (first 20): [{available_plugins}]" + ) + + actions = plugin.actions + resolved_action_name, action = _resolve_key(actions, task.action) + if action is None: + available_actions = ", ".join(sorted(actions.keys())[:30]) + raise KeyError( + "Unable to find QIIME action " + f"{task.plugin!r}.{task.action!r} for task {task.id!r}. " + "This usually means the runtime image is not the expected QIIME distribution/version. " + f"Available actions in plugin {task.plugin!r} (first 30): [{available_actions}]" + ) + kwargs: dict[str, t.Any] = {} + metadata_inputs: dict[str, t.Any] = {} + + for name, src in task.inputs.items(): + if src.id not in scope: + raise KeyError(f"Missing input dependency {src.id!r} for task {task.id!r}.") + value = scope[src.id] + if src.kind == "archive": + kwargs[name] = value + elif src.kind == "metadata": + metadata_inputs[name] = _as_metadata(value) + else: + raise TypeError(f"Unsupported input kind: {src.kind!r}") + + for name, param in task.parameters.items(): + if param.kind == "literal": + kwargs[name] = _coerce_action_parameter(action=action, parameter_name=name, value=param.value) + elif param.kind == "promoted": + if param.id not in params: + raise KeyError(f"Missing promoted parameter {param.id!r} for task {task.id!r}.") + kwargs[name] = _coerce_action_parameter( + action=action, + parameter_name=name, + value=params[param.id], + ) + elif param.kind == "metadata": + if name not in metadata_inputs: + raise KeyError(f"Missing metadata input {name!r} for task {task.id!r}.") + metadata = metadata_inputs.pop(name) + column = _resolve_metadata_column_name(param=param, params=params) + kwargs[name] = metadata.get_column(column) + else: + raise TypeError(f"Unsupported parameter kind: {param.kind!r}") + + for name, value in metadata_inputs.items(): + kwargs[name] = value + + results = action(**kwargs) + for name, dest in task.outputs.items(): + scope[dest.id] = getattr(results, name) + + +def _coerce_action_parameter(*, action: t.Any, parameter_name: str, value: t.Any) -> t.Any: + if value is None: + return None + + signature = getattr(action, "signature", None) + parameters = getattr(signature, "parameters", None) + if not isinstance(parameters, Mapping): + return value + if parameter_name not in parameters: + return value + + qiime_type = getattr(parameters[parameter_name], "qiime_type", None) + if qiime_type is None: + return value + + from qiime2.sdk.util import parse_primitive + + return parse_primitive(qiime_type, value) + + +def _resolve_key(mapping: t.Mapping[str, t.Any], requested: str) -> tuple[str | None, t.Any]: + if requested in mapping: + return requested, mapping[requested] + + canonical_requested = _canonical_name(requested) + for key in mapping.keys(): + if _canonical_name(key) == canonical_requested: + return key, mapping[key] + + return None, None + + +def _canonical_name(value: str) -> str: + return value.strip().replace("-", "_").replace(" ", "_").lower() + + +def _resolve_metadata_column_name(*, param, params: dict[str, t.Any]) -> str: + column = param.column + if column.kind == "literal": + return str(column.value) + if column.kind == "promoted": + if column.id not in params: + raise KeyError(f"Missing promoted metadata column parameter {column.id!r}.") + return str(params[column.id]) + raise TypeError(f"Unsupported metadata column selector kind: {column.kind!r}") + + +def _as_metadata(value: t.Any) -> t.Any: + from qiime2 import Artifact, Metadata + + if isinstance(value, Metadata): + return value + if isinstance(value, Artifact): + return value.view(Metadata) + return value + + +def _iter_tasks_in_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> t.Iterator[t.Any]: + remaining = list(tasks) + while remaining: + progressed = False + for task in list(remaining): + missing = _missing_input_ids(task=task, scope=scope) + if missing: + continue + remaining.remove(task) + progressed = True + yield task + + if not progressed: + details = [] + for task in remaining: + missing = ", ".join(_missing_input_ids(task=task, scope=scope)) + details.append(f"{task.id}: missing [{missing}]") + raise RuntimeError( + "Unable to resolve task dependencies for serial execution. " + + "; ".join(details) + ) + + +def _missing_input_ids(*, task: t.Any, scope: dict[str, t.Any]) -> list[str]: + missing: list[str] = [] + for src in task.inputs.values(): + if src.id not in scope: + missing.append(src.id) + return missing + + +def _save_outputs( + *, sig, arguments: AdagioArguments, scope: dict[str, t.Any], monitor: Monitor | None = None +) -> None: + if isinstance(arguments.outputs, str): + os.makedirs(arguments.outputs, exist_ok=True) + + for output in sig.outputs: + if output.id not in scope: + raise KeyError(f"Missing output value for {output.name!r} ({output.id}).") + + if isinstance(arguments.outputs, str): + destination = os.path.join(arguments.outputs, output.name) + elif isinstance(arguments.outputs, dict): + destination = arguments.outputs[output.name] + else: + raise TypeError("Unsupported outputs configuration.") + + parent = os.path.dirname(destination) + if parent: + os.makedirs(parent, exist_ok=True) + + value = scope[output.id] + save_fn = getattr(value, "save", None) + if not callable(save_fn): + raise TypeError(f"Output {output.name!r} does not support save().") + try: + save_fn(destination) + except Exception as exc: # noqa: BLE001 + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="failed", + error=str(exc), + ) + raise + else: + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="succeeded", + ) + + +def _is_metadata_ast(ast: TypeAST) -> bool: + if isinstance(ast, TypeASTExpression): + return bool(ast.builtin and ast.name.startswith("Metadata")) + if isinstance(ast, (TypeASTUnion, TypeASTIntersection)): + return any(_is_metadata_ast(member) for member in ast.members) + return False + + +def _task_label(task: t.Any) -> str: + kind = getattr(task, "kind", "unknown") + task_id = getattr(task, "id", "") + if kind == "plugin-action": + plugin = getattr(task, "plugin", "") + action = getattr(task, "action", "") + return f"{task_id} ({plugin}.{action})" + if kind == "built-in": + name = getattr(task, "name", "built-in") + return f"{task_id} ({name})" + return task_id From b43259eec9370fb921e2e6327f52c50c71af2b5b Mon Sep 17 00:00:00 2001 From: John Chase Date: Sat, 28 Feb 2026 09:08:45 -0800 Subject: [PATCH 13/44] Adds serial qiime runner with docker --- examples/arguments.json | 23 +-- src/adagio/cli/runner.py | 277 ++++++++++++++++++++++++++++++++++- src/adagio/dummy_execute.py | 62 -------- src/adagio/monitor/tty.py | 1 + src/adagio/serial_execute.py | 84 ++++++++--- 5 files changed, 343 insertions(+), 104 deletions(-) delete mode 100644 src/adagio/dummy_execute.py diff --git a/examples/arguments.json b/examples/arguments.json index 02092eb..4148c4a 100644 --- a/examples/arguments.json +++ b/examples/arguments.json @@ -1,20 +1,9 @@ { - "emp-paired-1": { - "inputs": {}, - "outputs": {}, - "parameters": { - "barcodes": "barcodes", - "trunc_len": 120, - "trim_foobar": 150 - } - }, - "foo-paired-1": { - "inputs": {}, - "outputs": {}, - "parameters": { - "barcodes": "barcodes", - "trunc_len": 120, - "trim_foobar": 150 - } + "inputs": {}, + "outputs": {}, + "parameters": { + "barcodes": "barcodes", + "trunc_len": 120, + "trim_foobar": 150 } } diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index dafaa09..70cc33c 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,9 +1,16 @@ import json +import os +import subprocess +import sys +import tempfile from pathlib import Path from typing import Any from rich.console import Console +DEFAULT_CONTAINER_IMAGE = "sloth-adagio-cli:latest" +HOST_MOUNT_POINT = "/host" + def run_pipeline_from_kwargs( pipeline: Path, @@ -17,10 +24,8 @@ def run_pipeline_from_kwargs( console: Console, ) -> None: """Run a pipeline from resolved CLI keyword arguments.""" - from ..dummy_execute import execute from ..model.arguments import AdagioArgumentsFile from ..model.pipeline import AdagioPipeline - from ..monitor.tty import RichMonitor data = json.loads(pipeline.read_text(encoding="utf-8")) pipeline_data = data.get("spec", data) if isinstance(data, dict) else data @@ -73,9 +78,32 @@ def run_pipeline_from_kwargs( ] raise SystemExit("Missing required arguments: " + ", ".join(missing)) - console.print(f"[bold]Pipeline:[/bold] {pipeline}") - console.print("[bold]Executing pipeline[/bold] (dummy mode)") - execute( + suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) + if not suppress_header: + console.print(f"[bold]Pipeline:[/bold] {pipeline}") + + force_container = _is_truthy(os.getenv("ADAGIO_FORCE_CONTAINER")) + local_qiime_error = _probe_local_qiime_error() + if force_container or local_qiime_error is not None: + if force_container: + if not suppress_header: + console.print("[bold]Executing pipeline[/bold] (container mode; forced)") + else: + if not suppress_header: + console.print("[bold]Executing pipeline[/bold] (container mode)") + console.print( + "[yellow]Local QIIME unavailable, falling back to Docker:[/yellow] " + f"{local_qiime_error}" + ) + _execute_via_container(pipeline=pipeline, arguments=arguments, console=console) + return + + from ..monitor.tty import RichMonitor + from ..serial_execute import execute_serial + + if not suppress_header: + console.print("[bold]Executing pipeline[/bold] (qiime serial mode)") + execute_serial( pipeline=parsed_pipeline, arguments=arguments, monitor=RichMonitor(console=console), @@ -85,3 +113,242 @@ def run_pipeline_from_kwargs( def _is_missing(value: Any) -> bool: """Treat placeholders and null values as missing.""" return value is None or value == "" + + +def _probe_local_qiime_error() -> str | None: + """Return an error string if local QIIME cannot satisfy serial execution imports.""" + try: + import qiime2 # noqa: F401 + from qiime2 import get_cache # noqa: F401 + from qiime2.sdk import PluginManager # noqa: F401 + except Exception as exc: # noqa: BLE001 + return str(exc) + return None + + +def _execute_via_container(*, pipeline: Path, arguments: Any, console: Console) -> None: + """Execute pipeline in the shared adagio-cli Docker image.""" + image = (os.getenv("ADAGIO_CONTAINER_IMAGE") or DEFAULT_CONTAINER_IMAGE).strip() + host_cwd = Path.cwd().resolve() + host_src_root = _local_source_root() + host_paths = _collect_host_paths( + pipeline=pipeline.resolve(), + arguments=arguments, + cwd=host_cwd, + ) + host_paths.append(host_src_root) + run_arguments = _to_container_run_arguments(arguments=arguments) + + with tempfile.TemporaryDirectory(prefix="adagio-runtime-") as temp_dir: + temp_path = Path(temp_dir) + args_path = temp_path / "arguments.json" + host_paths.append(args_path.resolve()) + + args_path.write_text( + json.dumps(run_arguments, ensure_ascii=True), + encoding="utf-8", + ) + + command = [ + "docker", + "run", + "--rm", + *_docker_tty_flags(), + "-e", + f"PYTHONPATH={_containerize_path(host_src_root)}", + "-e", + "ADAGIO_SUPPRESS_RUN_HEADER=1", + *_python_warning_env_flags(), + "-w", + _containerize_path(host_cwd), + image, + "python", + "-m", + "adagio.cli.main", + "run", + "--pipeline", + _containerize_path(pipeline.resolve()), + "--arguments", + _containerize_path(args_path), + "--show-params", + "all", + ] + command = _with_mounts(command=command, host_paths=host_paths) + + console.print(f"[dim]Container image:[/dim] {image}") + try: + result = subprocess.run( + command, + check=False, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise SystemExit( + "Docker is required for container fallback but was not found in PATH." + ) from exc + + _print_filtered_container_stderr(console=console, stderr_text=result.stderr or "") + + if result.returncode != 0: + raise SystemExit(result.returncode) + + +def _to_container_run_arguments(*, arguments: Any) -> dict[str, Any]: + """Serialize `adagio run` arguments and rewrite absolute host paths.""" + data = arguments.model_dump() if hasattr(arguments, "model_dump") else dict(arguments) + inputs = data.get("inputs", {}) + outputs = data.get("outputs") + + if isinstance(inputs, dict): + data["inputs"] = { + key: _containerize_host_value(value) if isinstance(value, str) else value + for key, value in inputs.items() + } + + if isinstance(outputs, str): + data["outputs"] = ( + _containerize_host_value(outputs) + if not _is_missing(outputs) + else outputs + ) + elif isinstance(outputs, dict): + data["outputs"] = { + key: _containerize_host_value(value) + if isinstance(value, str) and not _is_missing(value) + else value + for key, value in outputs.items() + } + + return { + "version": 1, + "inputs": data.get("inputs", {}), + "parameters": data.get("parameters", {}), + "outputs": data.get("outputs"), + } + + +def _collect_host_paths( + *, pipeline: Path, arguments: Any, cwd: Path +) -> list[Path]: + """Collect absolute host paths that must be visible in the container.""" + data = arguments.model_dump() if hasattr(arguments, "model_dump") else dict(arguments) + paths = [pipeline, cwd] + + inputs = data.get("inputs", {}) + if isinstance(inputs, dict): + for value in inputs.values(): + if isinstance(value, str) and not _is_uri(value): + as_path = Path(value) + if as_path.is_absolute(): + paths.append(as_path) + + outputs = data.get("outputs") + if isinstance(outputs, str): + if not _is_missing(outputs) and not _is_uri(outputs): + as_path = Path(outputs) + if as_path.is_absolute(): + paths.append(as_path) + elif isinstance(outputs, dict): + for value in outputs.values(): + if isinstance(value, str) and not _is_missing(value) and not _is_uri(value): + as_path = Path(value) + if as_path.is_absolute(): + paths.append(as_path) + + return [path.resolve() for path in paths] + + +def _with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: + """Attach bind mounts for top-level host roots needed by this execution.""" + roots = _mount_roots(host_paths) + mount_flags: list[str] = [] + for root in roots: + mount_flags.extend( + [ + "-v", + f"{root}:{_containerize_path(root)}:rw", + ] + ) + return [*command[:3], *mount_flags, *command[3:]] + + +def _docker_tty_flags() -> list[str]: + """Allocate Docker TTY when the current session is interactive.""" + if sys.stdin.isatty() and sys.stdout.isatty(): + return ["-t"] + return [] + + +def _python_warning_env_flags() -> list[str]: + """Suppress known noisy runtime warnings in container mode.""" + filters = os.getenv("ADAGIO_PYTHONWARNINGS") + if filters is None: + filters = "ignore:pkg_resources is deprecated as an API:UserWarning" + filters = filters.strip() + if not filters: + return [] + return ["-e", f"PYTHONWARNINGS={filters}"] + + +def _mount_roots(paths: list[Path]) -> list[Path]: + """Map paths to their first-level filesystem roots for portable bind mounts.""" + roots: set[Path] = set() + for path in paths: + parts = path.parts + if len(parts) < 2: + continue + root = Path("/", parts[1]) + if root.exists(): + roots.add(root) + return sorted(roots) + + +def _containerize_host_value(value: str) -> str: + """Map an absolute host path into the container mount.""" + if _is_uri(value): + return value + as_path = Path(value) + if as_path.is_absolute(): + return _containerize_path(as_path) + return value + + +def _containerize_path(path: Path) -> str: + """Convert absolute host path to mounted container path.""" + resolved = path.resolve() + return f"{HOST_MOUNT_POINT}{resolved}" + + +def _is_uri(value: str) -> bool: + return "://" in value + + +def _is_truthy(value: str | None) -> bool: + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def _local_source_root() -> Path: + """Return the local `adagio-cli/src` path for container PYTHONPATH.""" + return Path(__file__).resolve().parents[2] + + +def _print_filtered_container_stderr(*, console: Console, stderr_text: str) -> None: + """Print relevant stderr lines while dropping known noisy platform warnings.""" + if not stderr_text: + return + for line in stderr_text.splitlines(): + if _is_docker_platform_warning(line): + continue + if not line.strip(): + continue + console.print(line) + + +def _is_docker_platform_warning(line: str) -> bool: + return ( + "requested image's platform" in line + and "does not match the detected host platform" in line + ) diff --git a/src/adagio/dummy_execute.py b/src/adagio/dummy_execute.py deleted file mode 100644 index 065374c..0000000 --- a/src/adagio/dummy_execute.py +++ /dev/null @@ -1,62 +0,0 @@ -import time -from typing import Any - -from adagio.model.arguments import AdagioArguments -from adagio.model.pipeline import AdagioPipeline -from adagio.monitor.api import Monitor -from adagio.monitor.log import LogMonitor - - -SLEEP_SECONDS = 5.0 -SUBTASK_COUNT = 3 - - -def execute( - *, - pipeline: AdagioPipeline, - arguments: AdagioArguments, - monitor: Monitor | None = None, -) -> None: - """Execute a pipeline with fixed dummy progress.""" - sig = pipeline.signature - monitor = monitor or LogMonitor() - tasks = list(pipeline.iter_tasks()) - - pipeline.validate_graph() - sig.validate_arguments(arguments) - - subtasks = SUBTASK_COUNT - sleep_per_subtask = SLEEP_SECONDS / SUBTASK_COUNT - - monitor.start_pipeline(total_tasks=len(tasks)) - try: - for task in tasks: - monitor.queue_task( - task_id=task.id, - label=_task_label(task), - total_subtasks=subtasks, - ) - - for task in tasks: - monitor.start_task(task_id=task.id) - for _ in range(subtasks): - time.sleep(sleep_per_subtask) - monitor.advance_task(task_id=task.id, advance=1) - - monitor.finish_task(task_id=task.id, status="completed") - finally: - monitor.finish_pipeline() - - -def _task_label(task: Any) -> str: - """Build a human-readable label for a task.""" - kind = getattr(task, "kind", "unknown") - task_id = getattr(task, "id", "") - if kind == "plugin-action": - plugin = getattr(task, "plugin", "") - action = getattr(task, "action", "") - return f"{task_id} ({plugin}.{action})" - if kind == "built-in": - name = getattr(task, "name", "built-in") - return f"{task_id} ({name})" - return task_id diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index f84150f..5691dd8 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -136,6 +136,7 @@ def _refresh_row(self, task: _TaskState) -> None: completed=task.completed_subtasks, row=self._render_row(task), ) + self._progress.refresh() def _render_row(self, task: _TaskState) -> str: """Build a compact row for a task.""" diff --git a/src/adagio/serial_execute.py b/src/adagio/serial_execute.py index e07a238..0d0855d 100644 --- a/src/adagio/serial_execute.py +++ b/src/adagio/serial_execute.py @@ -2,6 +2,7 @@ import os import typing as t +import warnings import zipfile from collections.abc import Mapping @@ -12,6 +13,8 @@ from adagio.monitor.api import Monitor from adagio.monitor.log import LogMonitor +SERIAL_SUBTASK_COUNT = 1 + def execute_serial( *, pipeline: AdagioPipeline, arguments: AdagioArguments, monitor: Monitor | None = None @@ -29,13 +32,6 @@ def execute_serial( monitor.start_pipeline(total_tasks=len(tasks)) try: - for task in tasks: - monitor.queue_task( - task_id=task.id, - label=_task_label(task), - total_subtasks=1, - ) - plugin_manager = PluginManager() cache = get_cache() with cache: @@ -46,9 +42,18 @@ def execute_serial( _load_inputs(sig=sig, arguments=arguments, scope=scope) monitor.finish_load_input() + execution_plan = _plan_execution_order(tasks=tasks, scope=scope) + for task in execution_plan: + monitor.queue_task( + task_id=task.id, + label=_task_label(task), + # QIIME actions do not expose nested subtask progress. + total_subtasks=SERIAL_SUBTASK_COUNT, + ) + params = sig.get_params(arguments) - for task in _iter_tasks_in_execution_order(tasks=tasks, scope=scope): + for task in execution_plan: monitor.start_task(task_id=task.id) try: _execute_task(task=task, plugin_manager=plugin_manager, params=params, scope=scope) @@ -168,7 +173,8 @@ def _execute_plugin_action( for name, value in metadata_inputs.items(): kwargs[name] = value - results = action(**kwargs) + with _action_output_context(): + results = action(**kwargs) for name, dest in task.outputs.items(): scope[dest.id] = getattr(results, name) @@ -230,35 +236,36 @@ def _as_metadata(value: t.Any) -> t.Any: return value -def _iter_tasks_in_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> t.Iterator[t.Any]: +def _plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list[t.Any]: + """Return a dependency-respecting serial execution plan.""" + available_ids = set(scope.keys()) remaining = list(tasks) + planned: list[t.Any] = [] + while remaining: progressed = False for task in list(remaining): - missing = _missing_input_ids(task=task, scope=scope) + missing = [src.id for src in task.inputs.values() if src.id not in available_ids] if missing: continue + + planned.append(task) remaining.remove(task) progressed = True - yield task + for output in task.outputs.values(): + available_ids.add(output.id) if not progressed: details = [] for task in remaining: - missing = ", ".join(_missing_input_ids(task=task, scope=scope)) + missing = ", ".join(src.id for src in task.inputs.values() if src.id not in available_ids) details.append(f"{task.id}: missing [{missing}]") raise RuntimeError( "Unable to resolve task dependencies for serial execution. " + "; ".join(details) ) - -def _missing_input_ids(*, task: t.Any, scope: dict[str, t.Any]) -> list[str]: - missing: list[str] = [] - for src in task.inputs.values(): - if src.id not in scope: - missing.append(src.id) - return missing + return planned def _save_outputs( @@ -327,3 +334,40 @@ def _task_label(task: t.Any) -> str: name = getattr(task, "name", "built-in") return f"{task_id} ({name})" return task_id + + +class _action_output_context: + """Suppress plugin stdout/stderr noise unless explicitly enabled.""" + + def __enter__(self): + mode = os.getenv("ADAGIO_ACTION_STDIO", "").strip().lower() + self._suppress = mode not in {"inherit", "show", "verbose", "1", "true", "yes"} + if not self._suppress: + return self + + self._saved_fds: list[tuple[int, int]] = [] + self._sink = open(os.devnull, "w", encoding="utf-8") + self._warnings = warnings.catch_warnings() + self._warnings.__enter__() + warnings.filterwarnings( + "ignore", + message="pkg_resources is deprecated as an API.*", + category=UserWarning, + ) + for fd in (1, 2): + saved = os.dup(fd) + self._saved_fds.append((fd, saved)) + os.dup2(self._sink.fileno(), fd) + return self + + def __exit__(self, exc_type, exc, tb): + if not getattr(self, "_suppress", False): + return False + for fd, saved in reversed(self._saved_fds): + try: + os.dup2(saved, fd) + finally: + os.close(saved) + self._warnings.__exit__(exc_type, exc, tb) + self._sink.close() + return False From fba097e54132167e8788bd905b0f8d9723595c83 Mon Sep 17 00:00:00 2001 From: John Chase Date: Wed, 4 Mar 2026 11:36:39 -0800 Subject: [PATCH 14/44] Fizes output name issue --- src/adagio/cli/runner.py | 48 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 70cc33c..9534913 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -10,6 +10,7 @@ DEFAULT_CONTAINER_IMAGE = "sloth-adagio-cli:latest" HOST_MOUNT_POINT = "/host" +DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" def run_pipeline_from_kwargs( @@ -31,9 +32,11 @@ def run_pipeline_from_kwargs( pipeline_data = data.get("spec", data) if isinstance(data, dict) else data parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) arguments = parsed_pipeline.signature.to_default_arguments() + output_names = [output.name for output in parsed_pipeline.signature.outputs] input_names = {name for _, name in input_bindings} param_names = {name for _, name in param_bindings} + output_name_set = set(output_names) if arguments_file is not None: file_data = json.loads(arguments_file.read_text(encoding="utf-8")) @@ -51,9 +54,17 @@ def run_pipeline_from_kwargs( "Unknown parameters in arguments file: " + ", ".join(unknown_params) ) + unknown_outputs: list[str] = [] + if isinstance(arguments_data.outputs, dict): + unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) + if unknown_outputs: + raise SystemExit( + "Unknown outputs in arguments file: " + ", ".join(unknown_outputs) + ) + arguments.inputs.update(arguments_data.inputs) arguments.parameters.update(arguments_data.parameters) - if arguments_data.outputs: + if arguments_data.outputs is not None: arguments.outputs = arguments_data.outputs for ident, original in input_bindings: @@ -78,6 +89,12 @@ def run_pipeline_from_kwargs( ] raise SystemExit("Missing required arguments: " + ", ".join(missing)) + arguments.outputs = _resolve_output_destinations( + outputs=arguments.outputs, + output_names=output_names, + cwd=Path.cwd().resolve(), + ) + suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) if not suppress_header: console.print(f"[bold]Pipeline:[/bold] {pipeline}") @@ -115,6 +132,35 @@ def _is_missing(value: Any) -> bool: return value is None or value == "" +def _is_missing_output(value: Any) -> bool: + if not isinstance(value, str): + return True + return value == "" or value == "" + + +def _resolve_output_destinations( + *, + outputs: str | dict[str, str], + output_names: list[str], + cwd: Path, +) -> str | dict[str, str]: + default_output_dir = (cwd / DEFAULT_OUTPUT_DIRNAME).resolve() + if isinstance(outputs, str): + if _is_missing_output(outputs): + return str(default_output_dir) + return outputs + + if not isinstance(outputs, dict): + raise TypeError("Unsupported outputs configuration.") + + resolved = dict(outputs) + for output_name in output_names: + value = resolved.get(output_name) + if _is_missing_output(value): + resolved[output_name] = str((default_output_dir / output_name).resolve()) + return resolved + + def _probe_local_qiime_error() -> str | None: """Return an error string if local QIIME cannot satisfy serial execution imports.""" try: From a731f5ee08bf2d2968c437f803d8e760d2fe2b3d Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 5 Mar 2026 20:30:35 -0800 Subject: [PATCH 15/44] removes example --- examples/Dada2-arguments.json | 12 -- examples/Dada2.adg | 301 ---------------------------------- examples/arguments.json | 9 - 3 files changed, 322 deletions(-) delete mode 100644 examples/Dada2-arguments.json delete mode 100644 examples/Dada2.adg delete mode 100644 examples/arguments.json diff --git a/examples/Dada2-arguments.json b/examples/Dada2-arguments.json deleted file mode 100644 index b1d3cf7..0000000 --- a/examples/Dada2-arguments.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "version": 1, - "inputs": { - "sample_metadata": "sm.tsv", - "table": "table.qza" - }, - "parameters": { - "compare": "treatment", - "metric": "canberra" - }, - "outputs": {} -} diff --git a/examples/Dada2.adg b/examples/Dada2.adg deleted file mode 100644 index ec61d07..0000000 --- a/examples/Dada2.adg +++ /dev/null @@ -1,301 +0,0 @@ -{ - "name": "Dada2", - "description": "", - "spec": { - "type": "pipeline", - "meta": { - "version": "1.0.0rc" - }, - "signature": { - "inputs": [ - { - "id": "519bcdb4-0ebd-4d91-8831-d631514550ae", - "name": "seqs", - "type": "RawSequences | EMPSingleEndSequences | EMPPairedEndSequences", - "ast": { - "type": "union", - "members": [ - { - "name": "RawSequences", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - }, - { - "name": "EMPSingleEndSequences", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - }, - { - "name": "EMPPairedEndSequences", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ] - }, - "required": true - }, - { - "id": "102b1edf-1bfa-413d-b878-ca7b75e5a43e", - "name": "barcodes", - "type": "MetadataColumn[Categorical]", - "ast": { - "name": "MetadataColumn", - "type": "expression", - "fields": [ - { - "name": "Categorical", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - ], - "builtin": true, - "predicate": null - }, - "required": true - } - ], - "parameters": [ - { - "id": "62d9cdad-4d4d-4cb3-ac44-e1d3be2249c3", - "name": "barcodes", - "required": true, - "type": "MetadataColumn[Categorical]", - "ast": { - "name": "MetadataColumn", - "type": "expression", - "fields": [ - { - "name": "Categorical", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - ], - "builtin": true, - "predicate": null - } - }, - { - "id": "48e95116-d211-4943-b03e-074834a97c0e", - "name": "trunc_len", - "required": false, - "default": null, - "type": "Int", - "ast": { - "name": "Int", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "4837a451-6e0b-40bb-8422-0740d73fe562", - "name": "trim_foobar", - "required": false, - "default": 150, - "type": "Int", - "ast": { - "name": "Int", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - } - ], - "outputs": [ - { - "id": "53395d12-a842-4f3d-b960-4df47fb9d2b5", - "name": "table_1", - "type": "FeatureTable[Frequency]", - "ast": { - "name": "FeatureTable", - "type": "expression", - "fields": [ - { - "name": "Frequency", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ], - "builtin": false, - "predicate": null - } - }, - { - "id": "c25ee826-dd40-4c50-960c-aa445bdf121f", - "name": "denoising_stats_1", - "type": "SampleData[DADA2Stats]", - "ast": { - "name": "SampleData", - "type": "expression", - "fields": [ - { - "name": "DADA2Stats", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ], - "builtin": false, - "predicate": null - } - }, - { - "id": "0debddb6-8d5a-4e7d-82da-c96a3d7506cc", - "name": "representative_sequences_1", - "type": "FeatureData[Sequence]", - "ast": { - "name": "FeatureData", - "type": "expression", - "fields": [ - { - "name": "Sequence", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ], - "builtin": false, - "predicate": null - } - }, - { - "id": "1dca4868-80b5-4c78-9b58-a267c390da37", - "name": "per_sample_sequences", - "type": "SampleData[SequencesWithQuality]", - "ast": { - "name": "SampleData", - "type": "expression", - "fields": [ - { - "name": "SequencesWithQuality", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ], - "builtin": false, - "predicate": null - } - }, - { - "id": "9ea8facf-c776-4cf2-83f9-653dbac8edde", - "name": "error_correction_details", - "type": "ErrorCorrectionDetails", - "ast": { - "name": "ErrorCorrectionDetails", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - } - ] - }, - "graph": [ - { - "id": "4cb48e7b-1ab9-4db2-b615-c03246fe79e4", - "kind": "plugin-action", - "plugin": "dada2", - "action": "denoise_single", - "inputs": { - "demultiplexed_seqs": { - "kind": "archive", - "id": "1dca4868-80b5-4c78-9b58-a267c390da37" - } - }, - "parameters": { - "trunc_len": { - "kind": "promoted", - "id": "48e95116-d211-4943-b03e-074834a97c0e" - }, - "trim_left": { - "kind": "promoted", - "id": "4837a451-6e0b-40bb-8422-0740d73fe562" - } - }, - "outputs": { - "table": { - "kind": "archive", - "id": "53395d12-a842-4f3d-b960-4df47fb9d2b5" - }, - "representative_sequences": { - "kind": "archive", - "id": "0debddb6-8d5a-4e7d-82da-c96a3d7506cc" - }, - "denoising_stats": { - "kind": "archive", - "id": "c25ee826-dd40-4c50-960c-aa445bdf121f" - } - } - }, - { - "id": "d034ef2e-23c2-4b84-bbae-dd2a9b5bfba9", - "kind": "plugin-action", - "plugin": "demux", - "action": "emp_single", - "inputs": { - "seqs": { - "kind": "archive", - "id": "519bcdb4-0ebd-4d91-8831-d631514550ae" - }, - "barcodes": { - "kind": "metadata", - "id": "102b1edf-1bfa-413d-b878-ca7b75e5a43e" - } - }, - "parameters": { - "barcodes": { - "kind": "metadata", - "column": { - "kind": "promoted", - "id": "62d9cdad-4d4d-4cb3-ac44-e1d3be2249c3" - } - } - }, - "outputs": { - "per_sample_sequences": { - "kind": "archive", - "id": "1dca4868-80b5-4c78-9b58-a267c390da37" - }, - "error_correction_details": { - "kind": "archive", - "id": "9ea8facf-c776-4cf2-83f9-653dbac8edde" - } - } - } - ] - }, - "layout": { - "version": 0, - "type": "grid", - "positions": { - "4cb48e7b-1ab9-4db2-b615-c03246fe79e4": { - "row": 2, - "col": 3 - }, - "d034ef2e-23c2-4b84-bbae-dd2a9b5bfba9": { - "row": 1, - "col": 3 - } - } - }, - "exportedAt": "2026-02-17T03:58:22.236Z", - "version": 1 -} \ No newline at end of file diff --git a/examples/arguments.json b/examples/arguments.json deleted file mode 100644 index 4148c4a..0000000 --- a/examples/arguments.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "inputs": {}, - "outputs": {}, - "parameters": { - "barcodes": "barcodes", - "trunc_len": 120, - "trim_foobar": 150 - } -} From 740f781fe0d0c0dfd19052400b2dae0b0422c7ef Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 6 Mar 2026 16:37:49 -0800 Subject: [PATCH 16/44] restores qapi --- src/adagio/cli/main.py | 2 + src/adagio/cli/qapi.py | 71 ++++++++++++++++++++ src/adagio/qapi/__init__.py | 8 +++ src/adagio/qapi/build.py | 127 ++++++++++++++++++++++++++++++++++++ src/adagio/qapi/client.py | 38 +++++++++++ 5 files changed, 246 insertions(+) create mode 100644 src/adagio/cli/qapi.py create mode 100644 src/adagio/qapi/__init__.py create mode 100644 src/adagio/qapi/build.py create mode 100644 src/adagio/qapi/client.py diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 6ddc240..3ae225e 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -12,6 +12,7 @@ from ..app.parsers.pipeline import parse_inputs, parse_parameters from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .dynamic import build_dynamic_run +from .qapi import build_qapi from .runner import run_pipeline_from_kwargs @@ -39,6 +40,7 @@ def main(argv: list[str] | None = None) -> None: name="adagio", help="Adagio command line tool for processing pipelines created with the Adagio GUI.", ) + app.command(build_qapi, name="build-qapi") if not pipeline_str: diff --git a/src/adagio/cli/qapi.py b/src/adagio/cli/qapi.py new file mode 100644 index 0000000..21cfc48 --- /dev/null +++ b/src/adagio/cli/qapi.py @@ -0,0 +1,71 @@ +import json +from pathlib import Path +from typing import Annotated + +from cyclopts import Parameter +from rich.console import Console + +from ..qapi import DEFAULT_SCHEMA_VERSION, generate_qapi_payload, submit_qapi_payload + +console = Console() + + +def build_qapi( + *, + action_url: Annotated[ + str | None, + Parameter( + name=("--action-url",), + help=( + "Action Potential API base URL (e.g. http://localhost:81/api/v1). " + "Defaults to ACTION_URL env var." + ), + ), + ] = None, + schema_version: Annotated[ + str, + Parameter( + name=("--schema-version",), + help="Schema version string stored alongside generated plugin data.", + ), + ] = DEFAULT_SCHEMA_VERSION, + output: Annotated[ + Path | None, + Parameter( + name=("--output",), + help="Optional path to write the generated request JSON.", + ), + ] = None, + timeout: Annotated[ + int, + Parameter( + name=("--timeout",), + help="HTTP timeout (seconds) for submitting to Action Potential.", + ), + ] = 60, + dry_run: Annotated[ + bool, + Parameter( + name=("--dry-run",), + help="Generate the payload but do not submit it to Action Potential.", + ), + ] = False, +) -> None: + """Generate QAPI from the active QIIME environment and submit it to Action Potential.""" + request_body = generate_qapi_payload(schema_version=schema_version) + + if output is not None: + output.write_text(json.dumps(request_body, indent=2), encoding="utf-8") + console.print(f"[green]Wrote QAPI payload:[/green] {output}") + + if dry_run: + console.print("[yellow]Dry run enabled; skipping submit.[/yellow]") + return + + url, status, response_body = submit_qapi_payload( + request_body, action_url=action_url, timeout=timeout + ) + + console.print(f"[green]Submitted QAPI to[/green] {url} [green](HTTP {status})[/green]") + if response_body.strip(): + console.print(response_body) diff --git a/src/adagio/qapi/__init__.py b/src/adagio/qapi/__init__.py new file mode 100644 index 0000000..dd69a83 --- /dev/null +++ b/src/adagio/qapi/__init__.py @@ -0,0 +1,8 @@ +from .build import DEFAULT_SCHEMA_VERSION, generate_qapi_payload +from .client import submit_qapi_payload + +__all__ = [ + "DEFAULT_SCHEMA_VERSION", + "generate_qapi_payload", + "submit_qapi_payload", +] diff --git a/src/adagio/qapi/build.py b/src/adagio/qapi/build.py new file mode 100644 index 0000000..171313e --- /dev/null +++ b/src/adagio/qapi/build.py @@ -0,0 +1,127 @@ +import collections +from typing import Any, cast + +DEFAULT_SCHEMA_VERSION = "0.1.0" + + +def generate_qapi_payload(*, schema_version: str = DEFAULT_SCHEMA_VERSION) -> dict[str, Any]: + import qiime2 + import qiime2.core.transform as transform + import qiime2.sdk + from qiime2.core.type.grammar import IntersectionExp, PredicateExp, UnionExp + from qiime2.core.type.meta import TypeExp, TypeVarExp + + plugin_manager = qiime2.sdk.PluginManager() + + def flatten_type_maps(qiime_type: Any) -> Any: + if isinstance(qiime_type, TypeVarExp): + final = [] + for outer in list(qiime_type): + if isinstance(outer, PredicateExp): + final.append(outer) + continue + for inner in list(outer): + final.append(flatten_type_maps(inner)) + final_union = UnionExp(final) + final_union.normalize() + return final_union + + if isinstance(qiime_type, TypeExp): + final_fields = [flatten_type_maps(field) for field in qiime_type.fields] + + final_predicate = None + if isinstance(qiime_type.predicate, UnionExp): + predicate = qiime_type.predicate.unpack_union() + final_predicate = UnionExp([flatten_type_maps(elem) for elem in predicate]) + final_predicate.normalize() + elif isinstance(qiime_type.predicate, IntersectionExp): + predicate = qiime_type.predicate.unpack_intersection() + final_predicate = IntersectionExp( + [flatten_type_maps(elem) for elem in predicate] + ) + final_predicate.normalize() + elif isinstance(qiime_type.predicate, PredicateExp): + final_predicate = flatten_type_maps(qiime_type.predicate) + + return qiime_type.duplicate(final_fields, final_predicate) + + return qiime_type + + def ast_to_basename(ast: dict[str, Any]) -> str: + if not ast.get("fields"): + return cast(str, ast["name"]) + + fields = [ast_to_basename(field) for field in cast(list[dict[str, Any]], ast["fields"])] + return f"{ast['name']}[{', '.join(fields)}]" + + def add_metadata_flag(ast: dict[str, Any]) -> dict[str, Any]: + try: + key = ast_to_basename(ast) + artifact_class = plugin_manager.artifact_classes[key] + from_type = transform.ModelType.from_view_type(artifact_class.format) + to_type = transform.ModelType.from_view_type(qiime2.Metadata) + ast["has_metadata"] = from_type.has_transformation(to_type) + except Exception: + return ast + return ast + + def optional_desc(value: Any) -> str | None: + no_value = qiime2.core.type.signature.__NoValueMeta # type: ignore[attr-defined] + return value if type(value) is not no_value else None + + def build_inspect_dict(action: Any) -> dict[str, Any]: + return { + "id": action.id, + "inputs": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": flatten_type_maps(spec.qiime_type).to_ast(), + "required": not spec.has_default(), + "description": optional_desc(spec.description), + } + for name, spec in action.signature.inputs.items() + ], + "parameters": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": flatten_type_maps(spec.qiime_type).to_ast(), + "required": not spec.has_default(), + "default": spec.default if spec.has_default() else None, + "description": optional_desc(spec.description), + } + for name, spec in action.signature.parameters.items() + ], + "outputs": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": add_metadata_flag(flatten_type_maps(spec.qiime_type).to_ast()), + "description": optional_desc(spec.description), + } + for name, spec in action.signature.outputs.items() + ], + "name": action.name, + "description": action.description, + "source": action.source.replace("\n```python\n", "").replace("```\n", ""), + } + + def build_data_dict(data: Any) -> dict[str, Any]: + result: dict[str, Any] = collections.defaultdict(dict) + for key, value in data.items(): + result[key] = build_inspect_dict(value) + return result + + qapi: dict[str, Any] = {} + for plugin_name in sorted(plugin_manager.plugins): + plugin = plugin_manager.plugins[plugin_name] + methods_dict = build_data_dict(plugin.actions) + methods_dict.update(build_data_dict(plugin.pipelines)) + qapi[plugin_name] = {"methods": methods_dict} + + return { + "qiime_version": qiime2.__version__, + "schema_version": schema_version, + "data": qapi, + } diff --git a/src/adagio/qapi/client.py b/src/adagio/qapi/client.py new file mode 100644 index 0000000..9392c82 --- /dev/null +++ b/src/adagio/qapi/client.py @@ -0,0 +1,38 @@ +import json +import os +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + + +def submit_qapi_payload( + payload: dict[str, Any], + *, + action_url: str | None = None, + timeout: int = 60, +) -> tuple[str, int, str]: + resolved_action_url = action_url or os.getenv("ACTION_URL") + if not resolved_action_url: + raise SystemExit( + "No Action URL configured. Set --action-url or ACTION_URL environment variable." + ) + + url = resolved_action_url.rstrip("/") + "/qapi/" + req = Request( + url=url, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urlopen(req, timeout=timeout) as resp: # nosec: B310 - user-supplied API URL is intended + status = resp.status + response_body = resp.read().decode("utf-8", errors="replace") + except HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + raise SystemExit(f"QAPI submit failed ({exc.code}): {body}") from exc + except URLError as exc: + raise SystemExit(f"QAPI submit failed: {exc.reason}") from exc + + return url, status, response_body From 0ee4e28e8fd3880ad3397e41c70c5c730512b284 Mon Sep 17 00:00:00 2001 From: John Chase Date: Sun, 8 Mar 2026 15:06:24 -0700 Subject: [PATCH 17/44] Adds ability to add single plugins --- src/adagio/cli/qapi.py | 97 +++++++++++++++++++++++++++++++++++---- src/adagio/qapi/build.py | 35 +++++++++++++- src/adagio/qapi/client.py | 19 ++++++-- 3 files changed, 138 insertions(+), 13 deletions(-) diff --git a/src/adagio/cli/qapi.py b/src/adagio/cli/qapi.py index 21cfc48..6123419 100644 --- a/src/adagio/cli/qapi.py +++ b/src/adagio/cli/qapi.py @@ -1,4 +1,5 @@ import json +import os from pathlib import Path from typing import Annotated @@ -10,6 +11,39 @@ console = Console() +def _print_submission_summary(response_body: object) -> None: + if isinstance(response_body, dict): + message = response_body.get("message") + if isinstance(message, str) and message.strip(): + console.print(message) + + operations = response_body.get("operations") + if isinstance(operations, list): + created = [ + operation["plugin_name"] + for operation in operations + if isinstance(operation, dict) and operation.get("action") == "create" + ] + overwritten = [ + operation["plugin_name"] + for operation in operations + if isinstance(operation, dict) and operation.get("action") == "overwrite" + ] + if created: + console.print(f"[green]Create:[/green] {', '.join(created)}") + if overwritten: + console.print(f"[yellow]Overwrite:[/yellow] {', '.join(overwritten)}") + return + + if isinstance(response_body, str): + if response_body.strip(): + console.print(response_body) + return + + if response_body is not None: + console.print(json.dumps(response_body, indent=2)) + + def build_qapi( *, action_url: Annotated[ @@ -29,6 +63,26 @@ def build_qapi( help="Schema version string stored alongside generated plugin data.", ), ] = DEFAULT_SCHEMA_VERSION, + plugin: Annotated[ + tuple[str, ...], + Parameter( + name=("--plugin",), + help=( + "Plugin name to include. Repeat the option for multiple plugins. " + "Comma-separated values are also accepted." + ), + ), + ] = (), + all_plugins: Annotated[ + bool, + Parameter( + name=("--all",), + help=( + "Submit all installed plugins. This is also the default when " + "no --plugin values are provided." + ), + ), + ] = False, output: Annotated[ Path | None, Parameter( @@ -47,25 +101,52 @@ def build_qapi( bool, Parameter( name=("--dry-run",), - help="Generate the payload but do not submit it to Action Potential.", + help=( + "Preview the backend changes without writing them. If no Action URL is " + "configured, this falls back to generating the payload locally only." + ), + ), + ] = False, + force_overwrite: Annotated[ + bool, + Parameter( + name=("--force-overwrite",), + help="Overwrite existing plugins for the same QIIME version.", ), ] = False, ) -> None: """Generate QAPI from the active QIIME environment and submit it to Action Potential.""" - request_body = generate_qapi_payload(schema_version=schema_version) + if all_plugins and plugin: + raise SystemExit("Use either --all or --plugin, not both.") + + requested_plugins = None if all_plugins or not plugin else plugin + try: + request_body = generate_qapi_payload( + schema_version=schema_version, + plugins=requested_plugins, + ) + except ValueError as exc: + raise SystemExit(str(exc)) from exc if output is not None: output.write_text(json.dumps(request_body, indent=2), encoding="utf-8") console.print(f"[green]Wrote QAPI payload:[/green] {output}") - if dry_run: - console.print("[yellow]Dry run enabled; skipping submit.[/yellow]") + resolved_action_url = action_url or os.getenv("ACTION_URL") + if dry_run and not resolved_action_url: + console.print( + "[yellow]Dry run enabled without an Action URL; generated the payload locally only.[/yellow]" + ) return url, status, response_body = submit_qapi_payload( - request_body, action_url=action_url, timeout=timeout + request_body, + action_url=action_url, + timeout=timeout, + dry_run=dry_run, + force_overwrite=force_overwrite, ) - console.print(f"[green]Submitted QAPI to[/green] {url} [green](HTTP {status})[/green]") - if response_body.strip(): - console.print(response_body) + verb = "Previewed QAPI submit against" if dry_run else "Submitted QAPI to" + console.print(f"[green]{verb}[/green] {url} [green](HTTP {status})[/green]") + _print_submission_summary(response_body) diff --git a/src/adagio/qapi/build.py b/src/adagio/qapi/build.py index 171313e..c74b578 100644 --- a/src/adagio/qapi/build.py +++ b/src/adagio/qapi/build.py @@ -1,10 +1,31 @@ import collections +from collections.abc import Sequence from typing import Any, cast DEFAULT_SCHEMA_VERSION = "0.1.0" -def generate_qapi_payload(*, schema_version: str = DEFAULT_SCHEMA_VERSION) -> dict[str, Any]: +def normalize_plugin_selection(plugin_names: Sequence[str] | None) -> list[str] | None: + """Normalize repeated or comma-separated plugin names.""" + if plugin_names is None: + return None + + normalized: list[str] = [] + for plugin_name in plugin_names: + for token in plugin_name.split(","): + stripped = token.strip() + if stripped: + normalized.append(stripped) + + return normalized + + +def generate_qapi_payload( + *, + schema_version: str = DEFAULT_SCHEMA_VERSION, + plugins: Sequence[str] | None = None, +) -> dict[str, Any]: + """Generate a QAPI payload for all plugins or a selected subset.""" import qiime2 import qiime2.core.transform as transform import qiime2.sdk @@ -114,7 +135,17 @@ def build_data_dict(data: Any) -> dict[str, Any]: return result qapi: dict[str, Any] = {} - for plugin_name in sorted(plugin_manager.plugins): + requested_plugins = normalize_plugin_selection(plugins) + selected_plugins = sorted(plugin_manager.plugins) + if requested_plugins is not None: + available_plugins = set(plugin_manager.plugins) + missing_plugins = sorted(set(requested_plugins) - available_plugins) + if missing_plugins: + missing = ", ".join(missing_plugins) + raise ValueError(f"Unknown plugin name(s): {missing}") + selected_plugins = sorted(set(requested_plugins)) + + for plugin_name in selected_plugins: plugin = plugin_manager.plugins[plugin_name] methods_dict = build_data_dict(plugin.actions) methods_dict.update(build_data_dict(plugin.pipelines)) diff --git a/src/adagio/qapi/client.py b/src/adagio/qapi/client.py index 9392c82..c72a342 100644 --- a/src/adagio/qapi/client.py +++ b/src/adagio/qapi/client.py @@ -10,7 +10,9 @@ def submit_qapi_payload( *, action_url: str | None = None, timeout: int = 60, -) -> tuple[str, int, str]: + dry_run: bool = False, + force_overwrite: bool = False, +) -> tuple[str, int, Any]: resolved_action_url = action_url or os.getenv("ACTION_URL") if not resolved_action_url: raise SystemExit( @@ -18,9 +20,14 @@ def submit_qapi_payload( ) url = resolved_action_url.rstrip("/") + "/qapi/" + request_body = { + **payload, + "dry_run": dry_run, + "force_overwrite": force_overwrite, + } req = Request( url=url, - data=json.dumps(payload).encode("utf-8"), + data=json.dumps(request_body).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) @@ -35,4 +42,10 @@ def submit_qapi_payload( except URLError as exc: raise SystemExit(f"QAPI submit failed: {exc.reason}") from exc - return url, status, response_body + if not response_body.strip(): + return url, status, "" + + try: + return url, status, json.loads(response_body) + except json.JSONDecodeError: + return url, status, response_body From 17bbee7b9add8c42d9d95bf784334cd47ae6bd0e Mon Sep 17 00:00:00 2001 From: John Chase Date: Sun, 8 Mar 2026 15:47:02 -0700 Subject: [PATCH 18/44] Addresses github pr review --- src/adagio/cli/main.py | 2 +- src/adagio/cli/runtime.py | 20 +++++++++++++++++--- src/adagio/serial_execute.py | 10 +++++++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index be870b5..82fa50e 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -77,8 +77,8 @@ def run( ), ] = ShowParamsMode.REQUIRED, ): - _ = show_params """Run a pipeline (requires --pipeline; dynamic options come from that file).""" + _ = show_params raise SystemExit( "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" ) diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index c3d0a8e..3b2c67f 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -21,10 +21,17 @@ def run_runtime(argv: list[str], *, console: Console) -> None: """Runtime entrypoint used by the runtime-adapter job container.""" parser = argparse.ArgumentParser( prog="adagio runtime", - description="Execute a pipeline from spec/config/arguments files.", + description=( + "Execute a pipeline from spec/config/arguments files. " + "The config file is currently validated for compatibility but does not alter runtime behavior." + ), ) parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") - parser.add_argument("--config", required=True, help="Path to config JSON.") + parser.add_argument( + "--config", + required=True, + help="Path to config JSON. The file is validated for compatibility but otherwise unused.", + ) parser.add_argument("--arguments", required=False, help="Path to run arguments JSON.") parser.add_argument("--job-id", required=False, help="Runtime job ID.") parser.add_argument("--output-dir", required=False, help="Directory for output artifacts.") @@ -38,7 +45,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: opts = parser.parse_args(argv) spec_data = _load_json(Path(opts.spec)) - _ = _load_json(Path(opts.config)) + _load_runtime_config(Path(opts.config)) runtime_arguments: Any = {} if opts.arguments: runtime_arguments = _load_json(Path(opts.arguments)) @@ -97,6 +104,13 @@ def _load_json(path: Path) -> Any: return json.loads(path.read_text(encoding="utf-8")) +def _load_runtime_config(path: Path) -> dict[str, Any]: + config = _load_json(path) + if not isinstance(config, dict): + raise SystemExit("Invalid runtime config: expected a JSON object.") + return config + + def _parse_pipeline(data: Any) -> AdagioPipeline: pipeline_data = data.get("spec", data) if isinstance(data, dict) else data return AdagioPipeline.model_validate(pipeline_data) diff --git a/src/adagio/serial_execute.py b/src/adagio/serial_execute.py index 0d0855d..8cd610e 100644 --- a/src/adagio/serial_execute.py +++ b/src/adagio/serial_execute.py @@ -281,7 +281,15 @@ def _save_outputs( if isinstance(arguments.outputs, str): destination = os.path.join(arguments.outputs, output.name) elif isinstance(arguments.outputs, dict): - destination = arguments.outputs[output.name] + destination = arguments.outputs.get(output.name) + if destination is None: + expected_outputs = ", ".join(sorted(item.name for item in sig.outputs)) + provided_outputs = ", ".join(sorted(arguments.outputs.keys())) or "" + raise KeyError( + "Missing destination for output " + f"{output.name!r}. Expected output names: [{expected_outputs}]. " + f"Provided output names: [{provided_outputs}]." + ) else: raise TypeError("Unsupported outputs configuration.") From a841ff119c2f5d77db12762ed3c14e949e9aff74 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 9 Mar 2026 21:09:44 -0700 Subject: [PATCH 19/44] Fixes pipeline/config merge --- Dockerfile | 2 +- src/adagio/cli/dynamic.py | 22 +++++++++++++++++++--- src/adagio/cli/main.py | 16 ++++++++++++++-- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3affffa..f14553e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2024.10 +ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2026.1 FROM ${QIIME_BASE_IMAGE} AS base ENV PYTHONUNBUFFERED=1 diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index b975050..9b3e773 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -104,6 +104,8 @@ def build_dynamic_run( *, input_specs: list[InputSpec], param_specs: list[ParamSpec], + argument_inputs: dict[str, Any] | None = None, + argument_params: dict[str, Any] | None = None, run_handler: Callable[ [ Path, @@ -124,6 +126,8 @@ def build_dynamic_run( required_params: list[str] = [] seen_idents: set[str] = set() seen_opts: set[str] = {"--pipeline", "-p", "--arguments", "--show-params"} + argument_inputs = argument_inputs or {} + argument_params = argument_params or {} command_group = Group("Command Options", sort_key=0) pipeline_group = Group( "Pipeline", @@ -221,6 +225,8 @@ def add_dynamic_option( ) seen_idents.add(ident) input_bindings.append((ident, original)) + argument_value = argument_inputs.get(original) + display_required = bool(spec.required and _is_missing(argument_value)) if spec.required: required_inputs.append(original) @@ -234,7 +240,7 @@ def add_dynamic_option( help_text=( f"Pipeline input: {original}" + (f" ({type_text})" if type_text else "") - + (" [required]" if spec.required else "") + + (" [required]" if display_required else "") ), default=None, group=pipeline_group, @@ -253,12 +259,18 @@ def add_dynamic_option( default = spec.default required = spec.required is_required = bool(required and default is None) + argument_value = argument_params.get(original) + has_argument_default = not _is_missing(argument_value) + display_default = ( + default if default is not None else (argument_value if has_argument_default else None) + ) + display_required = is_required and display_default is None param_default = None param_type: Any = _resolve_param_type(spec.type, default) opt = dynamic_opt(original, ParamType.PARAM) if is_required: required_params.append(original) - default_text = f" [default: {default}]" if default is not None else "" + default_text = f" [default: {display_default}]" if display_default is not None else "" add_dynamic_option( ident=ident, opt=opt, @@ -266,7 +278,7 @@ def add_dynamic_option( py_type=param_type, help_text=( f"Pipeline parameter: {original}" - + (" [required]" if is_required else "") + + (" [required]" if display_required else "") + default_text ), default=param_default, @@ -298,3 +310,7 @@ def run( "Use: adagio run --pipeline PATH --help" ) return run + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 82721df..c05bbd5 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -106,6 +106,8 @@ def run( dynamic_run = build_dynamic_run( input_specs=visible_inputs, param_specs=visible_params, + argument_inputs=arguments_data.get("inputs", {}) if arguments_data else None, + argument_params=arguments_data.get("parameters", {}) if arguments_data else None, run_handler=partial(run_pipeline_from_kwargs, console=console), ) app.command(dynamic_run, name="run") @@ -130,9 +132,19 @@ def _filter_visible_specs( state_params.update(arguments_data.get("parameters", {})) if show_mode is ShowParamsMode.REQUIRED: - filtered_inputs = [spec for spec in input_specs if spec.required] + filtered_inputs = [ + spec + for spec in input_specs + if spec.required and _is_missing(state_inputs.get(spec.name)) + ] filtered_params = [ - spec for spec in param_specs if bool(spec.required and spec.default is None) + spec + for spec in param_specs + if bool( + spec.required + and spec.default is None + and _is_missing(state_params.get(spec.name)) + ) ] return filtered_inputs, filtered_params From 77ee15d36b01bcaa3eb318a9ff99ba68138822fe Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 9 Mar 2026 21:43:47 -0700 Subject: [PATCH 20/44] Removes uv lock --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3affffa..690c416 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ ENV PYTHONUNBUFFERED=1 WORKDIR /app FROM base AS dev -COPY ./pyproject.toml ./uv.lock /app/ +COPY ./pyproject.toml /app/ COPY ./README.md /app/ COPY ./src /app/src From a2e21c897a4cba8e2a3745f336591a8c5719e0e4 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 9 Mar 2026 22:22:37 -0700 Subject: [PATCH 21/44] Adds readme --- README.md | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 136 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1ac2213..47764ed 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,152 @@ -# Adagio +# Adagio CLI + +Command-line runner for Adagio pipeline files + +## Requirements + +- Python 3.10+ +- `uv` (recommended for development) +- Optional: Docker (used automatically when local QIIME imports are unavailable) ## Installation +Install from the current checkout: + ```bash pip install . ``` -## Usage +Or with `uv`: + +```bash +uv pip install . +``` + +Verify install: ```bash adagio --version +adagio --help +``` + +## Usage + +### Run a pipeline + +Show command help: + +```bash +adagio run --help +``` + +Run with a pipeline file: + +```bash +adagio run --pipeline path/to/pipeline.json +``` + +Equivalent positional form: + +```bash +adagio run path/to/pipeline.json +``` + +Use an arguments file: + +```bash +adagio run --pipeline path/to/pipeline.json --arguments path/to/arguments.json +``` + +Control which dynamic flags are shown in help: + +```bash +adagio run --pipeline path/to/pipeline.json --show-params required +# choices: all | missing | required +``` + +### Arguments file format + +`--arguments` can be downloaded from Adagio directly in the "Run" workflow : + +```json +{ + "version": 1, + "inputs": { + "input_name": "/path/to/input.qza" + }, + "parameters": { + "param_name": "value" + }, + "outputs": "/path/to/output-dir" +} +``` + +`outputs` may also be a map keyed by output name (WIP: not currently generated by Adagio): + +```json +{ + "outputs": { + "output_a": "/path/to/output-a", + "output_b": "/path/to/output-b" + } +} +``` + +If outputs are omitted, defaults are generated under `./adagio-outputs`. + +### QAPI generation/submission + +Generate and submit plugin metadata from the active QIIME environment: + +```bash +adagio build-qapi --action-url http://localhost:81/api/v1 +``` + +Write payload to disk without submitting: + +```bash +adagio build-qapi --output qapi.json --dry-run +``` + +Submit selected plugins only: + +```bash +adagio build-qapi --plugin dada2 --plugin feature-table ``` ## Development +### Setup + +Install runtime and dev dependencies: + +```bash +uv sync --group dev +``` + +Run commands inside the project environment: + +```bash +uv run adagio --help +``` + +### Linting + +```bash +uv run ruff check . +uv run ruff format . +``` + +### Running locally during development + +```bash +uv run adagio run --pipeline path/to/pipeline.json +``` + +### Runtime entrypoint (container/integration use) + +The `runtime` subcommand is intended for runtime-adapter jobs: + ```bash -uv sync -source .venv/bin/activate +uv run adagio runtime --spec spec.json --config config.json --arguments arguments.json ``` From 79ac3618835a615005d6a44969c2587aa904cffc Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 12 Mar 2026 23:25:16 -0700 Subject: [PATCH 22/44] feat(execution): add modular task-environment executor with Docker launcher - remove the legacy serial executor path - introduce task environment resolver/launcher abstractions - add default plugin-to-GHCR Docker image resolution - execute each plugin action in its own container - add a self-contained in-container task runner for QIIME actions - unify adagio run and runtime around the same executor - suppress leaked task stdout and refresh running task elapsed timers --- src/adagio/cli/main.py | 6 + src/adagio/cli/runner.py | 264 +-------------- src/adagio/cli/runtime.py | 11 +- src/adagio/cli/task_exec.py | 167 ++++++++++ src/adagio/executors/__init__.py | 13 + src/adagio/executors/base.py | 66 ++++ src/adagio/executors/common.py | 45 +++ src/adagio/executors/container_support.py | 105 ++++++ src/adagio/executors/defaults.py | 38 +++ src/adagio/executors/docker.py | 146 +++++++++ src/adagio/executors/task_environments.py | 320 ++++++++++++++++++ src/adagio/monitor/tty.py | 155 +++++---- src/adagio/serial_execute.py | 381 ---------------------- 13 files changed, 1018 insertions(+), 699 deletions(-) create mode 100644 src/adagio/cli/task_exec.py create mode 100644 src/adagio/executors/__init__.py create mode 100644 src/adagio/executors/base.py create mode 100644 src/adagio/executors/common.py create mode 100644 src/adagio/executors/container_support.py create mode 100644 src/adagio/executors/defaults.py create mode 100644 src/adagio/executors/docker.py create mode 100644 src/adagio/executors/task_environments.py delete mode 100644 src/adagio/serial_execute.py diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index c05bbd5..dbfb133 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -22,6 +22,12 @@ def main(argv: list[str] | None = None) -> None: argv = sys.argv[1:] if argv is None else argv + if argv and argv[0] == "exec-task": + from .task_exec import run_task_exec + + run_task_exec(argv[1:]) + return + if argv and argv[0] == "runtime": from .runtime import run_runtime diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 9534913..ad04765 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,15 +1,10 @@ import json import os -import subprocess -import sys -import tempfile from pathlib import Path from typing import Any from rich.console import Console -DEFAULT_CONTAINER_IMAGE = "sloth-adagio-cli:latest" -HOST_MOUNT_POINT = "/host" DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" @@ -99,31 +94,17 @@ def run_pipeline_from_kwargs( if not suppress_header: console.print(f"[bold]Pipeline:[/bold] {pipeline}") - force_container = _is_truthy(os.getenv("ADAGIO_FORCE_CONTAINER")) - local_qiime_error = _probe_local_qiime_error() - if force_container or local_qiime_error is not None: - if force_container: - if not suppress_header: - console.print("[bold]Executing pipeline[/bold] (container mode; forced)") - else: - if not suppress_header: - console.print("[bold]Executing pipeline[/bold] (container mode)") - console.print( - "[yellow]Local QIIME unavailable, falling back to Docker:[/yellow] " - f"{local_qiime_error}" - ) - _execute_via_container(pipeline=pipeline, arguments=arguments, console=console) - return + from ..executors import select_default_executor - from ..monitor.tty import RichMonitor - from ..serial_execute import execute_serial + executor = select_default_executor() if not suppress_header: - console.print("[bold]Executing pipeline[/bold] (qiime serial mode)") - execute_serial( + console.print(f"[bold]Executing pipeline[/bold] ({executor.mode_label})") + + executor.execute( pipeline=parsed_pipeline, arguments=arguments, - monitor=RichMonitor(console=console), + console=console, ) @@ -161,240 +142,7 @@ def _resolve_output_destinations( return resolved -def _probe_local_qiime_error() -> str | None: - """Return an error string if local QIIME cannot satisfy serial execution imports.""" - try: - import qiime2 # noqa: F401 - from qiime2 import get_cache # noqa: F401 - from qiime2.sdk import PluginManager # noqa: F401 - except Exception as exc: # noqa: BLE001 - return str(exc) - return None - - -def _execute_via_container(*, pipeline: Path, arguments: Any, console: Console) -> None: - """Execute pipeline in the shared adagio-cli Docker image.""" - image = (os.getenv("ADAGIO_CONTAINER_IMAGE") or DEFAULT_CONTAINER_IMAGE).strip() - host_cwd = Path.cwd().resolve() - host_src_root = _local_source_root() - host_paths = _collect_host_paths( - pipeline=pipeline.resolve(), - arguments=arguments, - cwd=host_cwd, - ) - host_paths.append(host_src_root) - run_arguments = _to_container_run_arguments(arguments=arguments) - - with tempfile.TemporaryDirectory(prefix="adagio-runtime-") as temp_dir: - temp_path = Path(temp_dir) - args_path = temp_path / "arguments.json" - host_paths.append(args_path.resolve()) - - args_path.write_text( - json.dumps(run_arguments, ensure_ascii=True), - encoding="utf-8", - ) - - command = [ - "docker", - "run", - "--rm", - *_docker_tty_flags(), - "-e", - f"PYTHONPATH={_containerize_path(host_src_root)}", - "-e", - "ADAGIO_SUPPRESS_RUN_HEADER=1", - *_python_warning_env_flags(), - "-w", - _containerize_path(host_cwd), - image, - "python", - "-m", - "adagio.cli.main", - "run", - "--pipeline", - _containerize_path(pipeline.resolve()), - "--arguments", - _containerize_path(args_path), - "--show-params", - "all", - ] - command = _with_mounts(command=command, host_paths=host_paths) - - console.print(f"[dim]Container image:[/dim] {image}") - try: - result = subprocess.run( - command, - check=False, - stderr=subprocess.PIPE, - text=True, - ) - except FileNotFoundError as exc: - raise SystemExit( - "Docker is required for container fallback but was not found in PATH." - ) from exc - - _print_filtered_container_stderr(console=console, stderr_text=result.stderr or "") - - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _to_container_run_arguments(*, arguments: Any) -> dict[str, Any]: - """Serialize `adagio run` arguments and rewrite absolute host paths.""" - data = arguments.model_dump() if hasattr(arguments, "model_dump") else dict(arguments) - inputs = data.get("inputs", {}) - outputs = data.get("outputs") - - if isinstance(inputs, dict): - data["inputs"] = { - key: _containerize_host_value(value) if isinstance(value, str) else value - for key, value in inputs.items() - } - - if isinstance(outputs, str): - data["outputs"] = ( - _containerize_host_value(outputs) - if not _is_missing(outputs) - else outputs - ) - elif isinstance(outputs, dict): - data["outputs"] = { - key: _containerize_host_value(value) - if isinstance(value, str) and not _is_missing(value) - else value - for key, value in outputs.items() - } - - return { - "version": 1, - "inputs": data.get("inputs", {}), - "parameters": data.get("parameters", {}), - "outputs": data.get("outputs"), - } - - -def _collect_host_paths( - *, pipeline: Path, arguments: Any, cwd: Path -) -> list[Path]: - """Collect absolute host paths that must be visible in the container.""" - data = arguments.model_dump() if hasattr(arguments, "model_dump") else dict(arguments) - paths = [pipeline, cwd] - - inputs = data.get("inputs", {}) - if isinstance(inputs, dict): - for value in inputs.values(): - if isinstance(value, str) and not _is_uri(value): - as_path = Path(value) - if as_path.is_absolute(): - paths.append(as_path) - - outputs = data.get("outputs") - if isinstance(outputs, str): - if not _is_missing(outputs) and not _is_uri(outputs): - as_path = Path(outputs) - if as_path.is_absolute(): - paths.append(as_path) - elif isinstance(outputs, dict): - for value in outputs.values(): - if isinstance(value, str) and not _is_missing(value) and not _is_uri(value): - as_path = Path(value) - if as_path.is_absolute(): - paths.append(as_path) - - return [path.resolve() for path in paths] - - -def _with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: - """Attach bind mounts for top-level host roots needed by this execution.""" - roots = _mount_roots(host_paths) - mount_flags: list[str] = [] - for root in roots: - mount_flags.extend( - [ - "-v", - f"{root}:{_containerize_path(root)}:rw", - ] - ) - return [*command[:3], *mount_flags, *command[3:]] - - -def _docker_tty_flags() -> list[str]: - """Allocate Docker TTY when the current session is interactive.""" - if sys.stdin.isatty() and sys.stdout.isatty(): - return ["-t"] - return [] - - -def _python_warning_env_flags() -> list[str]: - """Suppress known noisy runtime warnings in container mode.""" - filters = os.getenv("ADAGIO_PYTHONWARNINGS") - if filters is None: - filters = "ignore:pkg_resources is deprecated as an API:UserWarning" - filters = filters.strip() - if not filters: - return [] - return ["-e", f"PYTHONWARNINGS={filters}"] - - -def _mount_roots(paths: list[Path]) -> list[Path]: - """Map paths to their first-level filesystem roots for portable bind mounts.""" - roots: set[Path] = set() - for path in paths: - parts = path.parts - if len(parts) < 2: - continue - root = Path("/", parts[1]) - if root.exists(): - roots.add(root) - return sorted(roots) - - -def _containerize_host_value(value: str) -> str: - """Map an absolute host path into the container mount.""" - if _is_uri(value): - return value - as_path = Path(value) - if as_path.is_absolute(): - return _containerize_path(as_path) - return value - - -def _containerize_path(path: Path) -> str: - """Convert absolute host path to mounted container path.""" - resolved = path.resolve() - return f"{HOST_MOUNT_POINT}{resolved}" - - -def _is_uri(value: str) -> bool: - return "://" in value - - def _is_truthy(value: str | None) -> bool: if value is None: return False return value.strip().lower() in {"1", "true", "yes", "on"} - - -def _local_source_root() -> Path: - """Return the local `adagio-cli/src` path for container PYTHONPATH.""" - return Path(__file__).resolve().parents[2] - - -def _print_filtered_container_stderr(*, console: Console, stderr_text: str) -> None: - """Print relevant stderr lines while dropping known noisy platform warnings.""" - if not stderr_text: - return - for line in stderr_text.splitlines(): - if _is_docker_platform_warning(line): - continue - if not line.strip(): - continue - console.print(line) - - -def _is_docker_platform_warning(line: str) -> bool: - return ( - "requested image's platform" in line - and "does not match the detected host platform" in line - ) diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 3b2c67f..d25b70d 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -79,10 +79,17 @@ def run_runtime(argv: list[str], *, console: Console) -> None: payload={"event": "job_status", "status": "running"}, ) - from ..serial_execute import execute_serial + from ..executors import select_default_executor + + executor = select_default_executor() try: - execute_serial(pipeline=pipeline, arguments=arguments, monitor=monitor) + executor.execute( + pipeline=pipeline, + arguments=arguments, + console=console, + monitor=monitor, + ) except Exception as exc: # noqa: BLE001 if connected and runtime_url and opts.job_id: _post_job_event( diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py new file mode 100644 index 0000000..5f8b303 --- /dev/null +++ b/src/adagio/cli/task_exec.py @@ -0,0 +1,167 @@ +"""Internal exec-task subcommand: runs a single QIIME action inside a plugin container.""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import warnings +import zipfile +from pathlib import Path +from typing import Any + + +def run_task_exec(argv: list[str]) -> None: + """Entrypoint for the internal ``adagio exec-task`` subcommand.""" + parser = argparse.ArgumentParser( + prog="adagio exec-task", + description="Execute a single QIIME plugin action (internal use only).", + ) + parser.add_argument("--task", required=True, help="Path to the task spec JSON file.") + opts = parser.parse_args(argv) + + task_spec = json.loads(Path(opts.task).read_text(encoding="utf-8")) + _run_task(task_spec) + + +def _run_task(spec: dict[str, Any]) -> None: + from qiime2 import Artifact, Metadata + from qiime2.sdk import PluginManager + + plugin_name: str = spec["plugin"] + action_name: str = spec["action"] + archive_inputs: dict[str, str] = spec.get("archive_inputs", {}) + metadata_inputs: dict[str, str] = spec.get("metadata_inputs", {}) + params: dict[str, Any] = spec.get("params", {}) + metadata_column_kwargs: dict[str, dict[str, str]] = spec.get("metadata_column_kwargs", {}) + outputs: dict[str, str] = spec["outputs"] + result_manifest: str | None = spec.get("result_manifest") + + plugin_manager = PluginManager() + + plugin = _resolve_key(plugin_manager.plugins, plugin_name) + if plugin is None: + available = ", ".join(sorted(plugin_manager.plugins.keys())[:20]) + raise KeyError( + f"QIIME plugin {plugin_name!r} not found. " + f"Available plugins (first 20): [{available}]" + ) + + action = _resolve_key(plugin.actions, action_name) + if action is None: + available = ", ".join(sorted(plugin.actions.keys())[:30]) + raise KeyError( + f"QIIME action {plugin_name!r}.{action_name!r} not found. " + f"Available actions (first 30): [{available}]" + ) + + kwargs: dict[str, Any] = {} + + for name, path in archive_inputs.items(): + kwargs[name] = Artifact.load(path) + + loaded_metadata: dict[str, Metadata] = {} + for name, path in metadata_inputs.items(): + if zipfile.is_zipfile(path): + loaded_metadata[name] = Artifact.load(path).view(Metadata) + else: + loaded_metadata[name] = Metadata.load(path) + + for param_name, col_spec in metadata_column_kwargs.items(): + source_name: str = col_spec["source"] + column_name: str = col_spec["column"] + metadata = loaded_metadata.pop(source_name) + kwargs[param_name] = metadata.get_column(column_name) + + for name, metadata in loaded_metadata.items(): + kwargs[name] = metadata + + for name, value in params.items(): + kwargs[name] = _coerce_param(action=action, name=name, value=value) + + with action_output_context(): + results = action(**kwargs) + + saved_outputs: dict[str, str] = {} + for name, dest_path in outputs.items(): + artifact = getattr(results, name) + saved_outputs[name] = artifact.save(dest_path) + + if result_manifest: + Path(result_manifest).write_text( + json.dumps(saved_outputs, ensure_ascii=True), + encoding="utf-8", + ) + + +def _resolve_key(mapping: Any, requested: str) -> Any: + if requested in mapping: + return mapping[requested] + canonical = _canonical(requested) + for key in mapping: + if _canonical(key) == canonical: + return mapping[key] + return None + + +def _canonical(value: str) -> str: + return value.strip().replace("-", "_").replace(" ", "_").lower() + + +def _coerce_param(*, action: Any, name: str, value: Any) -> Any: + if value is None: + return None + from collections.abc import Mapping + + signature = getattr(action, "signature", None) + parameters = getattr(signature, "parameters", None) + if not isinstance(parameters, Mapping) or name not in parameters: + return value + qiime_type = getattr(parameters[name], "qiime_type", None) + if qiime_type is None: + return value + from qiime2.sdk.util import parse_primitive + + return parse_primitive(qiime_type, value) + + +class action_output_context: + """Suppress plugin stdout/stderr noise unless explicitly enabled.""" + + def __enter__(self): + mode = os.getenv("ADAGIO_ACTION_STDIO", "").strip().lower() + self._suppress = mode not in {"inherit", "show", "verbose", "1", "true", "yes"} + if not self._suppress: + return self + + self._saved_fds: list[tuple[int, int]] = [] + self._sink = open(os.devnull, "w", encoding="utf-8") + self._warnings = warnings.catch_warnings() + self._warnings.__enter__() + warnings.filterwarnings( + "ignore", + message="pkg_resources is deprecated as an API.*", + category=UserWarning, + ) + for fd in (1, 2): + saved = os.dup(fd) + self._saved_fds.append((fd, saved)) + os.dup2(self._sink.fileno(), fd) + return self + + def __exit__(self, exc_type, exc, tb): + if not getattr(self, "_suppress", False): + return False + for fd, saved in reversed(self._saved_fds): + try: + os.dup2(saved, fd) + finally: + os.close(saved) + self._warnings.__exit__(exc_type, exc, tb) + self._sink.close() + return False + + +if __name__ == "__main__": + run_task_exec(sys.argv[1:]) diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py new file mode 100644 index 0000000..73662d5 --- /dev/null +++ b/src/adagio/executors/__init__.py @@ -0,0 +1,13 @@ +from .base import PipelineExecutor +from .defaults import DefaultTaskEnvironmentResolver +from .docker import DockerTaskEnvironmentLauncher +from .task_environments import TaskEnvironmentExecutor + + +def select_default_executor() -> PipelineExecutor: + return TaskEnvironmentExecutor( + environment_resolver=DefaultTaskEnvironmentResolver(), + launchers={ + "docker": DockerTaskEnvironmentLauncher(), + }, + ) diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py new file mode 100644 index 0000000..d286622 --- /dev/null +++ b/src/adagio/executors/base.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Mapping, Protocol + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.model.task import PluginActionTask +from adagio.monitor.api import Monitor + + +class PipelineExecutor(Protocol): + mode_label: str + + def execute( + self, + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + console: Console | None = None, + monitor: Monitor | None = None, + ) -> None: ... + + +@dataclass(frozen=True) +class TaskEnvironmentSpec: + kind: str + reference: str + description: str | None = None + options: Mapping[str, Any] | None = None + + +@dataclass(frozen=True) +class TaskExecutionRequest: + task: PluginActionTask + cwd: Path + work_path: Path + archive_inputs: Mapping[str, str] + metadata_inputs: Mapping[str, str] + params: Mapping[str, Any] + metadata_column_kwargs: Mapping[str, Mapping[str, str]] + outputs: Mapping[str, str] + + +@dataclass(frozen=True) +class TaskExecutionResult: + outputs: Mapping[str, str] + + +class TaskEnvironmentResolver(Protocol): + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: ... + + +class TaskEnvironmentLauncher(Protocol): + kind: str + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: ... diff --git a/src/adagio/executors/common.py b/src/adagio/executors/common.py new file mode 100644 index 0000000..9821990 --- /dev/null +++ b/src/adagio/executors/common.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import typing as t + + +def plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list[t.Any]: + """Return a dependency-respecting serial execution plan.""" + available_ids = set(scope.keys()) + remaining = list(tasks) + planned: list[t.Any] = [] + + while remaining: + progressed = False + for task in list(remaining): + missing = [src.id for src in task.inputs.values() if src.id not in available_ids] + if missing: + continue + + planned.append(task) + remaining.remove(task) + progressed = True + for output in task.outputs.values(): + available_ids.add(output.id) + + if not progressed: + details = [] + for task in remaining: + missing = ", ".join(src.id for src in task.inputs.values() if src.id not in available_ids) + details.append(f"{task.id}: missing [{missing}]") + raise RuntimeError("Unable to resolve task dependencies. " + "; ".join(details)) + + return planned + + +def task_label(task: t.Any) -> str: + kind = getattr(task, "kind", "unknown") + task_id = getattr(task, "id", "") + if kind == "plugin-action": + plugin = getattr(task, "plugin", "") + action = getattr(task, "action", "") + return f"{task_id} ({plugin}.{action})" + if kind == "built-in": + name = getattr(task, "name", "built-in") + return f"{task_id} ({name})" + return task_id diff --git a/src/adagio/executors/container_support.py b/src/adagio/executors/container_support.py new file mode 100644 index 0000000..21081c1 --- /dev/null +++ b/src/adagio/executors/container_support.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import os +import sys +from pathlib import Path + +from rich.console import Console + +HOST_MOUNT_POINT = "/host" + + +def with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: + """Attach bind mounts for top-level host roots needed by this execution.""" + roots = mount_roots(host_paths) + mount_flags: list[str] = [] + for root in roots: + mount_flags.extend( + [ + "-v", + f"{root}:{containerize_path(root)}:rw", + ] + ) + return [*command[:3], *mount_flags, *command[3:]] + + +def docker_tty_flags() -> list[str]: + """Allocate Docker TTY when the current session is interactive.""" + if sys.stdin.isatty() and sys.stdout.isatty(): + return ["-t"] + return [] + + +def python_warning_env_flags() -> list[str]: + """Suppress known noisy runtime warnings in container mode.""" + filters = os.getenv("ADAGIO_PYTHONWARNINGS") + if filters is None: + filters = "ignore:pkg_resources is deprecated as an API:UserWarning" + filters = filters.strip() + if not filters: + return [] + return ["-e", f"PYTHONWARNINGS={filters}"] + + +def mount_roots(paths: list[Path]) -> list[Path]: + """Map paths to their first-level filesystem roots for portable bind mounts.""" + roots: set[Path] = set() + for path in paths: + parts = path.parts + if len(parts) < 2: + continue + root = Path("/", parts[1]) + if root.exists(): + roots.add(root) + return sorted(roots) + + +def containerize_host_value(value: str) -> str: + """Map an absolute host path into the container mount.""" + if is_uri(value): + return value + as_path = Path(value) + if as_path.is_absolute(): + return containerize_path(as_path) + return value + + +def containerize_path(path: Path) -> str: + """Convert an absolute host path to the mounted container path.""" + return f"{HOST_MOUNT_POINT}{path.resolve()}" + + +def host_path_from_container(value: str) -> Path: + """Convert a mounted container path back to the original host path.""" + if not value.startswith(HOST_MOUNT_POINT): + return Path(value) + suffix = value[len(HOST_MOUNT_POINT) :] + return Path(suffix).resolve() + + +def is_uri(value: str) -> bool: + return "://" in value + + +def local_source_root() -> Path: + """Return the local `adagio-cli/src` path for container PYTHONPATH.""" + return Path(__file__).resolve().parents[2] + + +def print_filtered_container_stderr(*, console: Console, stderr_text: str) -> None: + """Print relevant stderr lines while dropping known noisy platform warnings.""" + if not stderr_text: + return + for line in stderr_text.splitlines(): + if is_docker_platform_warning(line): + continue + if not line.strip(): + continue + console.print(line) + + +def is_docker_platform_warning(line: str) -> bool: + return ( + "requested image's platform" in line + and "does not match the detected host platform" in line + ) diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py new file mode 100644 index 0000000..baa8cef --- /dev/null +++ b/src/adagio/executors/defaults.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from adagio.model.task import PluginActionTask + +from .base import TaskEnvironmentResolver, TaskEnvironmentSpec + +DEFAULT_REGISTRY = "ghcr.io/cymis" +DEFAULT_IMAGE_PREFIX = "qiime2-plugin-" +DEFAULT_TAG = "2026.1" + + +class DefaultTaskEnvironmentResolver(TaskEnvironmentResolver): + """Resolve plugin actions to default task environments. + + The current default is a Docker image in GHCR derived from the plugin name. + The interface is task-scoped so future config can override individual tasks + with Docker, SIF/Apptainer, Conda, or cluster-specific environments. + """ + + def __init__( + self, + *, + registry: str = DEFAULT_REGISTRY, + image_prefix: str = DEFAULT_IMAGE_PREFIX, + tag: str = DEFAULT_TAG, + ) -> None: + self._registry = registry.rstrip("/") + self._image_prefix = image_prefix + self._tag = tag + + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: + normalized = task.plugin.lower().replace("_", "-") + reference = f"{self._registry}/{self._image_prefix}{normalized}:{self._tag}" + return TaskEnvironmentSpec( + kind="docker", + reference=reference, + description=f"default plugin image for {task.plugin}", + ) diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py new file mode 100644 index 0000000..37fdf2f --- /dev/null +++ b/src/adagio/executors/docker.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import json +import subprocess +from pathlib import Path + +from rich.console import Console + +from .base import ( + TaskEnvironmentLauncher, + TaskEnvironmentSpec, + TaskExecutionRequest, + TaskExecutionResult, +) +from .container_support import ( + containerize_host_value, + containerize_path, + docker_tty_flags, + host_path_from_container, + is_uri, + local_source_root, + print_filtered_container_stderr, + python_warning_env_flags, + with_mounts, +) + + +class DockerTaskEnvironmentLauncher(TaskEnvironmentLauncher): + kind = "docker" + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: + task = request.task + archive_inputs = { + name: containerize_host_value(value) + for name, value in request.archive_inputs.items() + } + metadata_inputs = { + name: containerize_host_value(value) + for name, value in request.metadata_inputs.items() + } + outputs = { + name: containerize_path(Path(path)) + for name, path in request.outputs.items() + } + + safe_id = task.id.replace("/", "_").replace(" ", "_") + result_manifest_path = (request.work_path / f"{safe_id}_results.json").resolve() + task_spec = { + "plugin": task.plugin, + "action": task.action, + "archive_inputs": archive_inputs, + "metadata_inputs": metadata_inputs, + "params": dict(request.params), + "metadata_column_kwargs": dict(request.metadata_column_kwargs), + "outputs": outputs, + "result_manifest": containerize_path(result_manifest_path), + } + + task_spec_path = (request.work_path / f"{safe_id}_spec.json").resolve() + task_spec_path.write_text(json.dumps(task_spec, ensure_ascii=True), encoding="utf-8") + + src_root = local_source_root() + command = [ + "docker", + "run", + "--rm", + *docker_tty_flags(), + "-e", + f"PYTHONPATH={containerize_path(src_root)}", + *python_warning_env_flags(), + "-w", + containerize_path(request.cwd), + environment.reference, + "python", + "-m", + "adagio.cli.task_exec", + "--task", + containerize_path(task_spec_path), + ] + + host_paths = [request.cwd, request.work_path, src_root] + for value in list(request.archive_inputs.values()) + list(request.metadata_inputs.values()): + if is_uri(value): + continue + path = Path(value) + if path.is_absolute(): + host_paths.append(path) + + command = with_mounts(command=command, host_paths=host_paths) + + if console is not None: + console.print(f"[dim]Task environment:[/dim] docker {environment.reference}") + + try: + result = subprocess.run( + command, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise SystemExit( + "Docker is required for task environment execution but was not found in PATH." + ) from exc + + if console is not None: + print_filtered_container_stderr(console=console, stderr_text=result.stderr or "") + + if result.returncode != 0: + stdout_text = (result.stdout or "").strip() + stderr_text = (result.stderr or "").strip() + if stderr_text: + detail = f" Docker reported: {stderr_text}" + elif stdout_text: + detail = f" Container stdout: {stdout_text}" + else: + detail = "" + raise RuntimeError( + f"Task {task.id!r} ({task.plugin}.{task.action}) failed " + f"while launching environment {environment.reference!r} " + f"with exit code {result.returncode}.{detail}" + ) + + if not result_manifest_path.exists(): + raise RuntimeError( + f"Task {task.id!r} completed but did not write an output manifest." + ) + + output_manifest = json.loads(result_manifest_path.read_text(encoding="utf-8")) + outputs = {} + for output_name in request.outputs: + actual_path = output_manifest.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not report output {output_name!r}." + ) + outputs[output_name] = str(host_path_from_container(actual_path)) + + return TaskExecutionResult(outputs=outputs) diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py new file mode 100644 index 0000000..ff61a6a --- /dev/null +++ b/src/adagio/executors/task_environments.py @@ -0,0 +1,320 @@ +from __future__ import annotations + +import os +import shutil +import tempfile +import typing as t +from pathlib import Path + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.model.task import PluginActionTask, RootInputTask +from adagio.monitor.api import Monitor +from adagio.monitor.log import LogMonitor +from adagio.monitor.tty import RichMonitor + +from .base import ( + PipelineExecutor, + TaskEnvironmentLauncher, + TaskEnvironmentResolver, + TaskExecutionRequest, +) +from .common import plan_execution_order, task_label +from .container_support import is_uri + +CONTAINER_SUBTASK_COUNT = 1 + + +class TaskEnvironmentExecutor(PipelineExecutor): + mode_label = "per-task environment mode" + + def __init__( + self, + *, + environment_resolver: TaskEnvironmentResolver, + launchers: dict[str, TaskEnvironmentLauncher], + ) -> None: + self._environment_resolver = environment_resolver + self._launchers = dict(launchers) + + def execute( + self, + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + console: Console | None = None, + monitor: Monitor | None = None, + ) -> None: + sig = pipeline.signature + tasks = list(pipeline.iter_tasks()) + active_monitor = _resolve_monitor(console=console, monitor=monitor) + + pipeline.validate_graph() + sig.validate_arguments(arguments) + + active_monitor.start_pipeline(total_tasks=len(tasks)) + + with tempfile.TemporaryDirectory(prefix="adagio-work-") as work_dir: + work_path = Path(work_dir) + scope: dict[str, str] = {} + completed_task_ids: set[str] = set() + cwd = Path.cwd().resolve() + + active_monitor.start_load_input() + for input_def in sig.inputs: + source = arguments.inputs[input_def.name] + scope[input_def.id] = _resolve_host_path(source=source, cwd=cwd) + active_monitor.finish_load_input() + + params = sig.get_params(arguments) + execution_plan = plan_execution_order(tasks=tasks, scope=scope) + + for task in execution_plan: + active_monitor.queue_task( + task_id=task.id, + label=task_label(task), + total_subtasks=CONTAINER_SUBTASK_COUNT, + ) + + try: + for task in execution_plan: + active_monitor.start_task(task_id=task.id) + try: + self._execute_task( + task=task, + params=params, + scope=scope, + work_path=work_path, + cwd=cwd, + console=console, + ) + active_monitor.advance_task(task_id=task.id, advance=1) + active_monitor.finish_task(task_id=task.id, status="completed") + completed_task_ids.add(task.id) + except Exception as exc: # noqa: BLE001 + active_monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) + for skipped_task in tasks: + if skipped_task.id == task.id or skipped_task.id in completed_task_ids: + continue + active_monitor.finish_task( + task_id=skipped_task.id, + status="skipped", + error=f"Skipped because task {task.id!r} failed.", + ) + raise + + active_monitor.start_save_output() + _save_outputs( + sig=sig, + arguments=arguments, + scope=scope, + monitor=active_monitor, + ) + active_monitor.finish_save_output() + finally: + active_monitor.finish_pipeline() + + def _execute_task( + self, + *, + task: t.Any, + params: dict[str, t.Any], + scope: dict[str, str], + work_path: Path, + cwd: Path, + console: Console | None, + ) -> None: + if isinstance(task, RootInputTask): + for name, src in task.inputs.items(): + dst = task.outputs[name] + scope[dst.id] = scope[src.id] + return + + if isinstance(task, PluginActionTask): + self._execute_plugin_action( + task=task, + params=params, + scope=scope, + work_path=work_path, + cwd=cwd, + console=console, + ) + return + + raise TypeError(f"Unsupported task type: {type(task)}") + + def _execute_plugin_action( + self, + *, + task: PluginActionTask, + params: dict[str, t.Any], + scope: dict[str, str], + work_path: Path, + cwd: Path, + console: Console | None, + ) -> None: + environment = self._environment_resolver.resolve(task=task) + launcher = self._launchers.get(environment.kind) + if launcher is None: + raise RuntimeError( + f"No task environment launcher registered for kind {environment.kind!r}." + ) + + archive_inputs: dict[str, str] = {} + metadata_inputs: dict[str, str] = {} + for name, src in task.inputs.items(): + value = scope[src.id] + if src.kind == "archive": + archive_inputs[name] = value + elif src.kind == "metadata": + metadata_inputs[name] = value + else: + raise TypeError(f"Unsupported input kind: {src.kind!r}") + + resolved_params: dict[str, t.Any] = {} + metadata_column_kwargs: dict[str, dict[str, str]] = {} + for name, param in task.parameters.items(): + if param.kind == "literal": + resolved_params[name] = param.value + elif param.kind == "promoted": + resolved_params[name] = params[param.id] + elif param.kind == "metadata": + column = param.column + if column.kind == "literal": + column_name = str(column.value) + elif column.kind == "promoted": + column_name = str(params[column.id]) + else: + raise TypeError(f"Unsupported metadata column kind: {column.kind!r}") + metadata_column_kwargs[name] = {"source": name, "column": column_name} + else: + raise TypeError(f"Unsupported parameter kind: {param.kind!r}") + + safe_id = task.id.replace("/", "_").replace(" ", "_") + outputs = { + name: str((work_path / f"{safe_id}_{name}").resolve()) + for name in task.outputs + } + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs=archive_inputs, + metadata_inputs=metadata_inputs, + params=resolved_params, + metadata_column_kwargs=metadata_column_kwargs, + outputs=outputs, + ) + result = launcher.launch( + environment=environment, + request=request, + console=console, + ) + + for output_name, dest in task.outputs.items(): + actual_path = result.outputs.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not produce output {output_name!r}." + ) + scope[dest.id] = actual_path + + +def _resolve_monitor(*, console: Console | None, monitor: Monitor | None) -> Monitor: + if monitor is not None: + return monitor + if console is not None: + return RichMonitor(console=console) + return LogMonitor() + + +def _save_outputs( + *, + sig, + arguments: AdagioArguments, + scope: dict[str, str], + monitor: Monitor | None, +) -> None: + if isinstance(arguments.outputs, str): + os.makedirs(arguments.outputs, exist_ok=True) + + for output in sig.outputs: + if output.id not in scope: + raise KeyError(f"Missing output value for {output.name!r} ({output.id}).") + + source_path = Path(scope[output.id]) + destination = _resolve_output_destination( + output_name=output.name, + outputs=arguments.outputs, + source_path=source_path, + sig=sig, + ) + + parent = os.path.dirname(destination) + if parent: + os.makedirs(parent, exist_ok=True) + + try: + shutil.copy2(source_path, destination) + except Exception as exc: # noqa: BLE001 + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="failed", + error=str(exc), + ) + raise + else: + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="succeeded", + ) + + +def _resolve_output_destination( + *, + output_name: str, + outputs: str | dict[str, str], + source_path: Path, + sig, +) -> str: + suffix = source_path.suffix + + if isinstance(outputs, str): + return _append_output_suffix(os.path.join(outputs, output_name), suffix) + + if isinstance(outputs, dict): + raw_dest = outputs.get(output_name) + if raw_dest is None: + expected_outputs = ", ".join(sorted(item.name for item in sig.outputs)) + provided_outputs = ", ".join(sorted(outputs.keys())) or "" + raise KeyError( + "Missing destination for output " + f"{output_name!r}. Expected output names: [{expected_outputs}]. " + f"Provided output names: [{provided_outputs}]." + ) + return _append_output_suffix(raw_dest, suffix) + + raise TypeError("Unsupported outputs configuration.") + + +def _append_output_suffix(destination: str, suffix: str) -> str: + if suffix and not destination.endswith(suffix): + return destination + suffix + return destination + + +def _resolve_host_path(*, source: str, cwd: Path) -> str: + if is_uri(source): + return source + path = Path(source) + if path.is_absolute(): + return str(path.resolve()) + return str((cwd / path).resolve()) diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 5691dd8..5e7dc63 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,4 +1,5 @@ import re +import threading import time from dataclasses import dataclass @@ -44,99 +45,137 @@ def __init__(self, *, console: Console | None = None): "failed": 0, "skipped": 0, } + self._lock = threading.RLock() + self._stop_refresh = threading.Event() + self._refresh_thread: threading.Thread | None = None self._pipeline_started = False self._total_tasks = 0 def start_pipeline(self, *, total_tasks: int = 0) -> None: """Start rendering pipeline progress.""" - if self._pipeline_started: - return - self._pipeline_started = True - self._total_tasks = total_tasks - self._progress.start() - self._console.print("[bold]Task Progress[/bold]") + with self._lock: + if self._pipeline_started: + return + self._pipeline_started = True + self._total_tasks = total_tasks + self._stop_refresh.clear() + self._progress.start() + self._console.print("[bold]Task Progress[/bold]") + self._refresh_thread = threading.Thread( + target=self._refresh_loop, + name="adagio-rich-monitor", + daemon=True, + ) + self._refresh_thread.start() def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: """Queue a task row in the progress view.""" - total = max(total_subtasks, 1) - state = _TaskState( - progress_task_id=-1, - label=label, - total_subtasks=total, - ) - row = self._render_row(state) - progress_task_id = self._progress.add_task( - description="", - total=total, - completed=0, - row=row, - ) - state.progress_task_id = progress_task_id - self._task_lookup[task_id] = state + with self._lock: + total = max(total_subtasks, 1) + state = _TaskState( + progress_task_id=-1, + label=label, + total_subtasks=total, + ) + row = self._render_row(state) + progress_task_id = self._progress.add_task( + description="", + total=total, + completed=0, + row=row, + ) + state.progress_task_id = progress_task_id + self._task_lookup[task_id] = state def start_task(self, *, task_id: str) -> None: """Mark a task as running.""" - task = self._task_lookup.get(task_id) - if task is None: - return - task.status = "running" - task.started_at = time.monotonic() - self._refresh_row(task) + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.status = "running" + task.started_at = time.monotonic() + self._refresh_row(task, refresh=False) + self._progress.refresh() def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: """Advance a task's subtask progress.""" del message - task = self._task_lookup.get(task_id) - if task is None: - return - task.completed_subtasks = min( - task.total_subtasks, task.completed_subtasks + max(advance, 0) - ) - self._refresh_row(task) + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.completed_subtasks = min( + task.total_subtasks, task.completed_subtasks + max(advance, 0) + ) + self._refresh_row(task) def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: """Mark a task as finished.""" - task = self._task_lookup.get(task_id) - if task is None: - return - - task.status = status - task.error = error - task.finished_at = time.monotonic() - if status in {"completed", "skipped"}: - task.completed_subtasks = task.total_subtasks - if status in self._status_counts: - self._status_counts[status] += 1 - self._refresh_row(task) + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + + task.status = status + task.error = error + task.finished_at = time.monotonic() + if status in {"completed", "skipped"}: + task.completed_subtasks = task.total_subtasks + if status in self._status_counts: + self._status_counts[status] += 1 + self._refresh_row(task) def finish_pipeline(self) -> None: """Stop rendering and print a summary.""" if not self._pipeline_started: return - self._progress.stop() - pending = self._total_tasks - sum(self._status_counts.values()) - self._console.print( - "Summary: " - f"{self._status_counts['completed']} completed, " - f"{self._status_counts['failed']} failed, " - f"{self._status_counts['skipped']} skipped, " - f"{max(pending, 0)} pending" - ) - - def _refresh_row(self, task: _TaskState) -> None: + self._stop_refresh.set() + if self._refresh_thread is not None: + self._refresh_thread.join(timeout=1.0) + self._refresh_thread = None + with self._lock: + self._progress.stop() + pending = self._total_tasks - sum(self._status_counts.values()) + self._console.print( + "Summary: " + f"{self._status_counts['completed']} completed, " + f"{self._status_counts['failed']} failed, " + f"{self._status_counts['skipped']} skipped, " + f"{max(pending, 0)} pending" + ) + self._pipeline_started = False + + def _refresh_row(self, task: _TaskState, *, refresh: bool = True) -> None: """Refresh a rendered task row.""" self._progress.update( task.progress_task_id, completed=task.completed_subtasks, row=self._render_row(task), ) - self._progress.refresh() + if refresh: + self._progress.refresh() + + def _refresh_loop(self) -> None: + """Refresh running task rows so elapsed time stays current.""" + while not self._stop_refresh.wait(0.5): + with self._lock: + running = [ + task + for task in self._task_lookup.values() + if task.status == "running" + ] + if not running: + continue + for task in running: + self._refresh_row(task, refresh=False) + self._progress.refresh() def _render_row(self, task: _TaskState) -> str: """Build a compact row for a task.""" diff --git a/src/adagio/serial_execute.py b/src/adagio/serial_execute.py deleted file mode 100644 index 8cd610e..0000000 --- a/src/adagio/serial_execute.py +++ /dev/null @@ -1,381 +0,0 @@ -from __future__ import annotations - -import os -import typing as t -import warnings -import zipfile -from collections.abc import Mapping - -from adagio.model.arguments import AdagioArguments -from adagio.model.ast import TypeAST, TypeASTExpression, TypeASTIntersection, TypeASTUnion -from adagio.model.pipeline import AdagioPipeline -from adagio.model.task import PluginActionTask, RootInputTask -from adagio.monitor.api import Monitor -from adagio.monitor.log import LogMonitor - -SERIAL_SUBTASK_COUNT = 1 - - -def execute_serial( - *, pipeline: AdagioPipeline, arguments: AdagioArguments, monitor: Monitor | None = None -) -> None: - """Execute a pipeline serially using the QIIME API (no Parsl).""" - from qiime2 import get_cache - from qiime2.sdk import PluginManager - - sig = pipeline.signature - tasks = list(pipeline.iter_tasks()) - monitor = monitor or LogMonitor() - - pipeline.validate_graph() - sig.validate_arguments(arguments) - - monitor.start_pipeline(total_tasks=len(tasks)) - try: - plugin_manager = PluginManager() - cache = get_cache() - with cache: - scope: dict[str, t.Any] = {} - completed_task_ids: set[str] = set() - - monitor.start_load_input() - _load_inputs(sig=sig, arguments=arguments, scope=scope) - monitor.finish_load_input() - - execution_plan = _plan_execution_order(tasks=tasks, scope=scope) - for task in execution_plan: - monitor.queue_task( - task_id=task.id, - label=_task_label(task), - # QIIME actions do not expose nested subtask progress. - total_subtasks=SERIAL_SUBTASK_COUNT, - ) - - params = sig.get_params(arguments) - - for task in execution_plan: - monitor.start_task(task_id=task.id) - try: - _execute_task(task=task, plugin_manager=plugin_manager, params=params, scope=scope) - monitor.advance_task(task_id=task.id, advance=1) - monitor.finish_task(task_id=task.id, status="completed") - completed_task_ids.add(task.id) - except Exception as exc: # noqa: BLE001 - monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) - for skipped_task in tasks: - if skipped_task.id == task.id or skipped_task.id in completed_task_ids: - continue - monitor.finish_task( - task_id=skipped_task.id, - status="skipped", - error=f"Skipped because task {task.id!r} failed.", - ) - raise - - monitor.start_save_output() - _save_outputs(sig=sig, arguments=arguments, scope=scope, monitor=monitor) - monitor.finish_save_output() - finally: - monitor.finish_pipeline() - - -def _load_inputs(*, sig, arguments: AdagioArguments, scope: dict[str, t.Any]) -> None: - from qiime2 import Artifact - - for input_def in sig.inputs: - source = arguments.inputs[input_def.name] - if _is_metadata_ast(input_def.ast): - scope[input_def.id] = _load_metadata(source) - else: - scope[input_def.id] = Artifact.load(source) - - -def _load_metadata(source: str) -> t.Any: - from qiime2 import Artifact, Metadata - - if zipfile.is_zipfile(source): - return Artifact.load(source).view(Metadata) - return Metadata.load(source) - - -def _execute_task(*, task: t.Any, plugin_manager, params: dict[str, t.Any], scope: dict[str, t.Any]) -> None: - if isinstance(task, RootInputTask): - for name, src in task.inputs.items(): - dst = task.outputs[name] - scope[dst.id] = scope[src.id] - return None - - if isinstance(task, PluginActionTask): - _execute_plugin_action(task=task, plugin_manager=plugin_manager, params=params, scope=scope) - return None - - raise TypeError(f"Unsupported task type: {type(task)}") - - -def _execute_plugin_action( - *, task: PluginActionTask, plugin_manager, params: dict[str, t.Any], scope: dict[str, t.Any] -) -> None: - plugins = plugin_manager.plugins - resolved_plugin_name, plugin = _resolve_key(plugins, task.plugin) - if plugin is None: - available_plugins = ", ".join(sorted(plugins.keys())[:20]) - raise KeyError( - "Unable to find QIIME plugin " - f"{task.plugin!r} for task {task.id!r}. " - "This usually means the runtime image is missing required plugins. " - f"Available plugins (first 20): [{available_plugins}]" - ) - - actions = plugin.actions - resolved_action_name, action = _resolve_key(actions, task.action) - if action is None: - available_actions = ", ".join(sorted(actions.keys())[:30]) - raise KeyError( - "Unable to find QIIME action " - f"{task.plugin!r}.{task.action!r} for task {task.id!r}. " - "This usually means the runtime image is not the expected QIIME distribution/version. " - f"Available actions in plugin {task.plugin!r} (first 30): [{available_actions}]" - ) - kwargs: dict[str, t.Any] = {} - metadata_inputs: dict[str, t.Any] = {} - - for name, src in task.inputs.items(): - if src.id not in scope: - raise KeyError(f"Missing input dependency {src.id!r} for task {task.id!r}.") - value = scope[src.id] - if src.kind == "archive": - kwargs[name] = value - elif src.kind == "metadata": - metadata_inputs[name] = _as_metadata(value) - else: - raise TypeError(f"Unsupported input kind: {src.kind!r}") - - for name, param in task.parameters.items(): - if param.kind == "literal": - kwargs[name] = _coerce_action_parameter(action=action, parameter_name=name, value=param.value) - elif param.kind == "promoted": - if param.id not in params: - raise KeyError(f"Missing promoted parameter {param.id!r} for task {task.id!r}.") - kwargs[name] = _coerce_action_parameter( - action=action, - parameter_name=name, - value=params[param.id], - ) - elif param.kind == "metadata": - if name not in metadata_inputs: - raise KeyError(f"Missing metadata input {name!r} for task {task.id!r}.") - metadata = metadata_inputs.pop(name) - column = _resolve_metadata_column_name(param=param, params=params) - kwargs[name] = metadata.get_column(column) - else: - raise TypeError(f"Unsupported parameter kind: {param.kind!r}") - - for name, value in metadata_inputs.items(): - kwargs[name] = value - - with _action_output_context(): - results = action(**kwargs) - for name, dest in task.outputs.items(): - scope[dest.id] = getattr(results, name) - - -def _coerce_action_parameter(*, action: t.Any, parameter_name: str, value: t.Any) -> t.Any: - if value is None: - return None - - signature = getattr(action, "signature", None) - parameters = getattr(signature, "parameters", None) - if not isinstance(parameters, Mapping): - return value - if parameter_name not in parameters: - return value - - qiime_type = getattr(parameters[parameter_name], "qiime_type", None) - if qiime_type is None: - return value - - from qiime2.sdk.util import parse_primitive - - return parse_primitive(qiime_type, value) - - -def _resolve_key(mapping: t.Mapping[str, t.Any], requested: str) -> tuple[str | None, t.Any]: - if requested in mapping: - return requested, mapping[requested] - - canonical_requested = _canonical_name(requested) - for key in mapping.keys(): - if _canonical_name(key) == canonical_requested: - return key, mapping[key] - - return None, None - - -def _canonical_name(value: str) -> str: - return value.strip().replace("-", "_").replace(" ", "_").lower() - - -def _resolve_metadata_column_name(*, param, params: dict[str, t.Any]) -> str: - column = param.column - if column.kind == "literal": - return str(column.value) - if column.kind == "promoted": - if column.id not in params: - raise KeyError(f"Missing promoted metadata column parameter {column.id!r}.") - return str(params[column.id]) - raise TypeError(f"Unsupported metadata column selector kind: {column.kind!r}") - - -def _as_metadata(value: t.Any) -> t.Any: - from qiime2 import Artifact, Metadata - - if isinstance(value, Metadata): - return value - if isinstance(value, Artifact): - return value.view(Metadata) - return value - - -def _plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list[t.Any]: - """Return a dependency-respecting serial execution plan.""" - available_ids = set(scope.keys()) - remaining = list(tasks) - planned: list[t.Any] = [] - - while remaining: - progressed = False - for task in list(remaining): - missing = [src.id for src in task.inputs.values() if src.id not in available_ids] - if missing: - continue - - planned.append(task) - remaining.remove(task) - progressed = True - for output in task.outputs.values(): - available_ids.add(output.id) - - if not progressed: - details = [] - for task in remaining: - missing = ", ".join(src.id for src in task.inputs.values() if src.id not in available_ids) - details.append(f"{task.id}: missing [{missing}]") - raise RuntimeError( - "Unable to resolve task dependencies for serial execution. " - + "; ".join(details) - ) - - return planned - - -def _save_outputs( - *, sig, arguments: AdagioArguments, scope: dict[str, t.Any], monitor: Monitor | None = None -) -> None: - if isinstance(arguments.outputs, str): - os.makedirs(arguments.outputs, exist_ok=True) - - for output in sig.outputs: - if output.id not in scope: - raise KeyError(f"Missing output value for {output.name!r} ({output.id}).") - - if isinstance(arguments.outputs, str): - destination = os.path.join(arguments.outputs, output.name) - elif isinstance(arguments.outputs, dict): - destination = arguments.outputs.get(output.name) - if destination is None: - expected_outputs = ", ".join(sorted(item.name for item in sig.outputs)) - provided_outputs = ", ".join(sorted(arguments.outputs.keys())) or "" - raise KeyError( - "Missing destination for output " - f"{output.name!r}. Expected output names: [{expected_outputs}]. " - f"Provided output names: [{provided_outputs}]." - ) - else: - raise TypeError("Unsupported outputs configuration.") - - parent = os.path.dirname(destination) - if parent: - os.makedirs(parent, exist_ok=True) - - value = scope[output.id] - save_fn = getattr(value, "save", None) - if not callable(save_fn): - raise TypeError(f"Output {output.name!r} does not support save().") - try: - save_fn(destination) - except Exception as exc: # noqa: BLE001 - if monitor is not None: - monitor.finish_output( - output_id=output.id, - output_name=output.name, - destination=destination, - status="failed", - error=str(exc), - ) - raise - else: - if monitor is not None: - monitor.finish_output( - output_id=output.id, - output_name=output.name, - destination=destination, - status="succeeded", - ) - - -def _is_metadata_ast(ast: TypeAST) -> bool: - if isinstance(ast, TypeASTExpression): - return bool(ast.builtin and ast.name.startswith("Metadata")) - if isinstance(ast, (TypeASTUnion, TypeASTIntersection)): - return any(_is_metadata_ast(member) for member in ast.members) - return False - - -def _task_label(task: t.Any) -> str: - kind = getattr(task, "kind", "unknown") - task_id = getattr(task, "id", "") - if kind == "plugin-action": - plugin = getattr(task, "plugin", "") - action = getattr(task, "action", "") - return f"{task_id} ({plugin}.{action})" - if kind == "built-in": - name = getattr(task, "name", "built-in") - return f"{task_id} ({name})" - return task_id - - -class _action_output_context: - """Suppress plugin stdout/stderr noise unless explicitly enabled.""" - - def __enter__(self): - mode = os.getenv("ADAGIO_ACTION_STDIO", "").strip().lower() - self._suppress = mode not in {"inherit", "show", "verbose", "1", "true", "yes"} - if not self._suppress: - return self - - self._saved_fds: list[tuple[int, int]] = [] - self._sink = open(os.devnull, "w", encoding="utf-8") - self._warnings = warnings.catch_warnings() - self._warnings.__enter__() - warnings.filterwarnings( - "ignore", - message="pkg_resources is deprecated as an API.*", - category=UserWarning, - ) - for fd in (1, 2): - saved = os.dup(fd) - self._saved_fds.append((fd, saved)) - os.dup2(self._sink.fileno(), fd) - return self - - def __exit__(self, exc_type, exc, tb): - if not getattr(self, "_suppress", False): - return False - for fd, saved in reversed(self._saved_fds): - try: - os.dup2(saved, fd) - finally: - os.close(saved) - self._warnings.__exit__(exc_type, exc, tb) - self._sink.close() - return False From 0c9d6dc085c99b31127af76be8cb80a79a258735 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 12 Mar 2026 23:41:44 -0700 Subject: [PATCH 23/44] Refactor --- README.md | 6 +- src/adagio/cli/runtime.py | 2 - src/adagio/cli/task_exec.py | 12 +- src/adagio/executors/__init__.py | 11 +- src/adagio/executors/base.py | 2 - src/adagio/executors/common.py | 2 - src/adagio/executors/container_support.py | 2 - src/adagio/executors/defaults.py | 2 - src/adagio/executors/docker.py | 46 ++--- src/adagio/executors/path_utils.py | 47 +++++ src/adagio/executors/serial_runner.py | 107 ++++++++++++ src/adagio/executors/task_contract.py | 59 +++++++ src/adagio/executors/task_environments.py | 198 ++++------------------ src/adagio/monitor/composite.py | 2 - src/adagio/monitor/connected.py | 2 - 15 files changed, 287 insertions(+), 213 deletions(-) create mode 100644 src/adagio/executors/path_utils.py create mode 100644 src/adagio/executors/serial_runner.py create mode 100644 src/adagio/executors/task_contract.py diff --git a/README.md b/README.md index 47764ed..5a502fa 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Command-line runner for Adagio pipeline files - Python 3.10+ - `uv` (recommended for development) -- Optional: Docker (used automatically when local QIIME imports are unavailable) +- Docker (currently required for pipeline execution) ## Installation @@ -45,6 +45,10 @@ Run with a pipeline file: adagio run --pipeline path/to/pipeline.json ``` +`adagio run` executes each plugin task in its own task environment. +Today the default task environment is a Docker image in GHCR derived from the plugin +name in the pipeline spec, for example `dada2` -> `ghcr.io/cymis/qiime2-plugin-dada2:2026.1`. + Equivalent positional form: ```bash diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index d25b70d..4ea13c9 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import argparse import json import os diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py index 5f8b303..4e26a83 100644 --- a/src/adagio/cli/task_exec.py +++ b/src/adagio/cli/task_exec.py @@ -1,9 +1,6 @@ """Internal exec-task subcommand: runs a single QIIME action inside a plugin container.""" -from __future__ import annotations - import argparse -import json import os import sys import warnings @@ -11,6 +8,8 @@ from pathlib import Path from typing import Any +from adagio.executors.task_contract import read_json_file, write_json_file + def run_task_exec(argv: list[str]) -> None: """Entrypoint for the internal ``adagio exec-task`` subcommand.""" @@ -21,7 +20,7 @@ def run_task_exec(argv: list[str]) -> None: parser.add_argument("--task", required=True, help="Path to the task spec JSON file.") opts = parser.parse_args(argv) - task_spec = json.loads(Path(opts.task).read_text(encoding="utf-8")) + task_spec = read_json_file(Path(opts.task)) _run_task(task_spec) @@ -89,10 +88,7 @@ def _run_task(spec: dict[str, Any]) -> None: saved_outputs[name] = artifact.save(dest_path) if result_manifest: - Path(result_manifest).write_text( - json.dumps(saved_outputs, ensure_ascii=True), - encoding="utf-8", - ) + write_json_file(Path(result_manifest), saved_outputs) def _resolve_key(mapping: Any, requested: str) -> Any: diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py index 73662d5..d1b7849 100644 --- a/src/adagio/executors/__init__.py +++ b/src/adagio/executors/__init__.py @@ -1,10 +1,11 @@ -from .base import PipelineExecutor -from .defaults import DefaultTaskEnvironmentResolver -from .docker import DockerTaskEnvironmentLauncher -from .task_environments import TaskEnvironmentExecutor +__all__ = ["select_default_executor"] -def select_default_executor() -> PipelineExecutor: +def select_default_executor(): + from .defaults import DefaultTaskEnvironmentResolver + from .docker import DockerTaskEnvironmentLauncher + from .task_environments import TaskEnvironmentExecutor + return TaskEnvironmentExecutor( environment_resolver=DefaultTaskEnvironmentResolver(), launchers={ diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index d286622..d023074 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from dataclasses import dataclass from pathlib import Path from typing import Any, Mapping, Protocol diff --git a/src/adagio/executors/common.py b/src/adagio/executors/common.py index 9821990..6e4e884 100644 --- a/src/adagio/executors/common.py +++ b/src/adagio/executors/common.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import typing as t diff --git a/src/adagio/executors/container_support.py b/src/adagio/executors/container_support.py index 21081c1..8c4b97c 100644 --- a/src/adagio/executors/container_support.py +++ b/src/adagio/executors/container_support.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import os import sys from pathlib import Path diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py index baa8cef..72ef0ab 100644 --- a/src/adagio/executors/defaults.py +++ b/src/adagio/executors/defaults.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from adagio.model.task import PluginActionTask from .base import TaskEnvironmentResolver, TaskEnvironmentSpec diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index 37fdf2f..b97ac16 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -1,6 +1,3 @@ -from __future__ import annotations - -import json import subprocess from pathlib import Path @@ -15,14 +12,21 @@ from .container_support import ( containerize_host_value, containerize_path, - docker_tty_flags, host_path_from_container, is_uri, local_source_root, print_filtered_container_stderr, + docker_tty_flags, python_warning_env_flags, with_mounts, ) +from .task_contract import ( + build_task_spec, + read_json_file, + result_manifest_path, + task_spec_path, + write_json_file, +) class DockerTaskEnvironmentLauncher(TaskEnvironmentLauncher): @@ -49,21 +53,19 @@ def launch( for name, path in request.outputs.items() } - safe_id = task.id.replace("/", "_").replace(" ", "_") - result_manifest_path = (request.work_path / f"{safe_id}_results.json").resolve() - task_spec = { - "plugin": task.plugin, - "action": task.action, - "archive_inputs": archive_inputs, - "metadata_inputs": metadata_inputs, - "params": dict(request.params), - "metadata_column_kwargs": dict(request.metadata_column_kwargs), - "outputs": outputs, - "result_manifest": containerize_path(result_manifest_path), - } - - task_spec_path = (request.work_path / f"{safe_id}_spec.json").resolve() - task_spec_path.write_text(json.dumps(task_spec, ensure_ascii=True), encoding="utf-8") + manifest_path = result_manifest_path(task_id=task.id, work_path=request.work_path) + spec_path = task_spec_path(task_id=task.id, work_path=request.work_path) + task_spec = build_task_spec( + plugin=task.plugin, + action=task.action, + archive_inputs=archive_inputs, + metadata_inputs=metadata_inputs, + params=dict(request.params), + metadata_column_kwargs=dict(request.metadata_column_kwargs), + outputs=outputs, + result_manifest=containerize_path(manifest_path), + ) + write_json_file(spec_path, task_spec) src_root = local_source_root() command = [ @@ -81,7 +83,7 @@ def launch( "-m", "adagio.cli.task_exec", "--task", - containerize_path(task_spec_path), + containerize_path(spec_path), ] host_paths = [request.cwd, request.work_path, src_root] @@ -128,12 +130,12 @@ def launch( f"with exit code {result.returncode}.{detail}" ) - if not result_manifest_path.exists(): + if not manifest_path.exists(): raise RuntimeError( f"Task {task.id!r} completed but did not write an output manifest." ) - output_manifest = json.loads(result_manifest_path.read_text(encoding="utf-8")) + output_manifest = read_json_file(manifest_path) outputs = {} for output_name in request.outputs: actual_path = output_manifest.get(output_name) diff --git a/src/adagio/executors/path_utils.py b/src/adagio/executors/path_utils.py new file mode 100644 index 0000000..245b18e --- /dev/null +++ b/src/adagio/executors/path_utils.py @@ -0,0 +1,47 @@ +import os +from pathlib import Path +from typing import Iterable + +from .container_support import is_uri + + +def resolve_host_path(*, source: str, cwd: Path) -> str: + if is_uri(source): + return source + path = Path(source) + if path.is_absolute(): + return str(path.resolve()) + return str((cwd / path).resolve()) + + +def resolve_output_destination( + *, + output_name: str, + output_names: Iterable[str], + outputs: str | dict[str, str], + source_path: Path, +) -> str: + suffix = source_path.suffix + + if isinstance(outputs, str): + return append_output_suffix(os.path.join(outputs, output_name), suffix) + + if isinstance(outputs, dict): + raw_dest = outputs.get(output_name) + if raw_dest is None: + expected_outputs = ", ".join(sorted(output_names)) + provided_outputs = ", ".join(sorted(outputs.keys())) or "" + raise KeyError( + "Missing destination for output " + f"{output_name!r}. Expected output names: [{expected_outputs}]. " + f"Provided output names: [{provided_outputs}]." + ) + return append_output_suffix(raw_dest, suffix) + + raise TypeError("Unsupported outputs configuration.") + + +def append_output_suffix(destination: str, suffix: str) -> str: + if suffix and not destination.endswith(suffix): + return destination + suffix + return destination diff --git a/src/adagio/executors/serial_runner.py b/src/adagio/executors/serial_runner.py new file mode 100644 index 0000000..9ea78ef --- /dev/null +++ b/src/adagio/executors/serial_runner.py @@ -0,0 +1,107 @@ +import tempfile +import typing as t +from dataclasses import dataclass +from pathlib import Path + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.monitor.api import Monitor +from adagio.monitor.log import LogMonitor +from adagio.monitor.tty import RichMonitor + +from .common import plan_execution_order, task_label +from .path_utils import resolve_host_path + +CONTAINER_SUBTASK_COUNT = 1 + + +@dataclass +class SerialExecutionState: + cwd: Path + work_path: Path + params: dict[str, t.Any] + scope: dict[str, str] + + +def run_serial_pipeline( + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + resolve_task: t.Callable[[t.Any, SerialExecutionState, Console | None], None], + finish_outputs: t.Callable[[t.Any, AdagioArguments, SerialExecutionState, Monitor | None], None], + console: Console | None = None, + monitor: Monitor | None = None, + total_subtasks: int = CONTAINER_SUBTASK_COUNT, +) -> None: + sig = pipeline.signature + tasks = list(pipeline.iter_tasks()) + active_monitor = resolve_monitor(console=console, monitor=monitor) + + pipeline.validate_graph() + sig.validate_arguments(arguments) + + active_monitor.start_pipeline(total_tasks=len(tasks)) + + with tempfile.TemporaryDirectory(prefix="adagio-work-") as work_dir: + state = SerialExecutionState( + cwd=Path.cwd().resolve(), + work_path=Path(work_dir), + params=sig.get_params(arguments), + scope={}, + ) + completed_task_ids: set[str] = set() + + active_monitor.start_load_input() + for input_def in sig.inputs: + source = arguments.inputs[input_def.name] + state.scope[input_def.id] = resolve_host_path(source=source, cwd=state.cwd) + active_monitor.finish_load_input() + + execution_plan = plan_execution_order(tasks=tasks, scope=state.scope) + for task in execution_plan: + active_monitor.queue_task( + task_id=task.id, + label=task_label(task), + total_subtasks=total_subtasks, + ) + + try: + for task in execution_plan: + active_monitor.start_task(task_id=task.id) + try: + resolve_task(task, state, console) + active_monitor.advance_task(task_id=task.id, advance=1) + active_monitor.finish_task(task_id=task.id, status="completed") + completed_task_ids.add(task.id) + except Exception as exc: # noqa: BLE001 + active_monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) + for skipped_task in tasks: + if skipped_task.id == task.id or skipped_task.id in completed_task_ids: + continue + active_monitor.finish_task( + task_id=skipped_task.id, + status="skipped", + error=f"Skipped because task {task.id!r} failed.", + ) + raise + + active_monitor.start_save_output() + finish_outputs( + sig=sig, + arguments=arguments, + state=state, + monitor=active_monitor, + ) + active_monitor.finish_save_output() + finally: + active_monitor.finish_pipeline() + + +def resolve_monitor(*, console: Console | None, monitor: Monitor | None) -> Monitor: + if monitor is not None: + return monitor + if console is not None: + return RichMonitor(console=console) + return LogMonitor() diff --git a/src/adagio/executors/task_contract.py b/src/adagio/executors/task_contract.py new file mode 100644 index 0000000..ad67b43 --- /dev/null +++ b/src/adagio/executors/task_contract.py @@ -0,0 +1,59 @@ +import json +from pathlib import Path +from typing import Any, Iterable + + +def task_file_stem(task_id: str) -> str: + return task_id.replace("/", "_").replace(" ", "_") + + +def build_task_outputs( + *, + task_id: str, + output_names: Iterable[str], + work_path: Path, +) -> dict[str, str]: + stem = task_file_stem(task_id) + return { + name: str((work_path / f"{stem}_{name}").resolve()) + for name in output_names + } + + +def task_spec_path(*, task_id: str, work_path: Path) -> Path: + return (work_path / f"{task_file_stem(task_id)}_spec.json").resolve() + + +def result_manifest_path(*, task_id: str, work_path: Path) -> Path: + return (work_path / f"{task_file_stem(task_id)}_results.json").resolve() + + +def build_task_spec( + *, + plugin: str, + action: str, + archive_inputs: dict[str, str], + metadata_inputs: dict[str, str], + params: dict[str, Any], + metadata_column_kwargs: dict[str, dict[str, str]], + outputs: dict[str, str], + result_manifest: str | None, +) -> dict[str, Any]: + return { + "plugin": plugin, + "action": action, + "archive_inputs": archive_inputs, + "metadata_inputs": metadata_inputs, + "params": params, + "metadata_column_kwargs": metadata_column_kwargs, + "outputs": outputs, + "result_manifest": result_manifest, + } + + +def read_json_file(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def write_json_file(path: Path, payload: dict[str, Any]) -> None: + path.write_text(json.dumps(payload, ensure_ascii=True), encoding="utf-8") diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py index ff61a6a..f04d90b 100644 --- a/src/adagio/executors/task_environments.py +++ b/src/adagio/executors/task_environments.py @@ -1,19 +1,12 @@ -from __future__ import annotations - import os import shutil -import tempfile -import typing as t from pathlib import Path from rich.console import Console from adagio.model.arguments import AdagioArguments -from adagio.model.pipeline import AdagioPipeline from adagio.model.task import PluginActionTask, RootInputTask from adagio.monitor.api import Monitor -from adagio.monitor.log import LogMonitor -from adagio.monitor.tty import RichMonitor from .base import ( PipelineExecutor, @@ -21,10 +14,9 @@ TaskEnvironmentResolver, TaskExecutionRequest, ) -from .common import plan_execution_order, task_label -from .container_support import is_uri - -CONTAINER_SUBTASK_COUNT = 1 +from .path_utils import resolve_output_destination +from .serial_runner import SerialExecutionState, run_serial_pipeline +from .task_contract import build_task_outputs class TaskEnvironmentExecutor(PipelineExecutor): @@ -42,103 +34,36 @@ def __init__( def execute( self, *, - pipeline: AdagioPipeline, + pipeline, arguments: AdagioArguments, console: Console | None = None, monitor: Monitor | None = None, ) -> None: - sig = pipeline.signature - tasks = list(pipeline.iter_tasks()) - active_monitor = _resolve_monitor(console=console, monitor=monitor) - - pipeline.validate_graph() - sig.validate_arguments(arguments) - - active_monitor.start_pipeline(total_tasks=len(tasks)) - - with tempfile.TemporaryDirectory(prefix="adagio-work-") as work_dir: - work_path = Path(work_dir) - scope: dict[str, str] = {} - completed_task_ids: set[str] = set() - cwd = Path.cwd().resolve() - - active_monitor.start_load_input() - for input_def in sig.inputs: - source = arguments.inputs[input_def.name] - scope[input_def.id] = _resolve_host_path(source=source, cwd=cwd) - active_monitor.finish_load_input() - - params = sig.get_params(arguments) - execution_plan = plan_execution_order(tasks=tasks, scope=scope) - - for task in execution_plan: - active_monitor.queue_task( - task_id=task.id, - label=task_label(task), - total_subtasks=CONTAINER_SUBTASK_COUNT, - ) - - try: - for task in execution_plan: - active_monitor.start_task(task_id=task.id) - try: - self._execute_task( - task=task, - params=params, - scope=scope, - work_path=work_path, - cwd=cwd, - console=console, - ) - active_monitor.advance_task(task_id=task.id, advance=1) - active_monitor.finish_task(task_id=task.id, status="completed") - completed_task_ids.add(task.id) - except Exception as exc: # noqa: BLE001 - active_monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) - for skipped_task in tasks: - if skipped_task.id == task.id or skipped_task.id in completed_task_ids: - continue - active_monitor.finish_task( - task_id=skipped_task.id, - status="skipped", - error=f"Skipped because task {task.id!r} failed.", - ) - raise - - active_monitor.start_save_output() - _save_outputs( - sig=sig, - arguments=arguments, - scope=scope, - monitor=active_monitor, - ) - active_monitor.finish_save_output() - finally: - active_monitor.finish_pipeline() + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=self._resolve_task, + finish_outputs=_save_outputs, + console=console, + monitor=monitor, + ) - def _execute_task( + def _resolve_task( self, - *, - task: t.Any, - params: dict[str, t.Any], - scope: dict[str, str], - work_path: Path, - cwd: Path, + task, + state: SerialExecutionState, console: Console | None, ) -> None: if isinstance(task, RootInputTask): for name, src in task.inputs.items(): dst = task.outputs[name] - scope[dst.id] = scope[src.id] + state.scope[dst.id] = state.scope[src.id] return if isinstance(task, PluginActionTask): self._execute_plugin_action( task=task, - params=params, - scope=scope, - work_path=work_path, - cwd=cwd, + state=state, console=console, ) return @@ -149,10 +74,7 @@ def _execute_plugin_action( self, *, task: PluginActionTask, - params: dict[str, t.Any], - scope: dict[str, str], - work_path: Path, - cwd: Path, + state: SerialExecutionState, console: Console | None, ) -> None: environment = self._environment_resolver.resolve(task=task) @@ -165,7 +87,7 @@ def _execute_plugin_action( archive_inputs: dict[str, str] = {} metadata_inputs: dict[str, str] = {} for name, src in task.inputs.items(): - value = scope[src.id] + value = state.scope[src.id] if src.kind == "archive": archive_inputs[name] = value elif src.kind == "metadata": @@ -173,34 +95,34 @@ def _execute_plugin_action( else: raise TypeError(f"Unsupported input kind: {src.kind!r}") - resolved_params: dict[str, t.Any] = {} + resolved_params: dict[str, object] = {} metadata_column_kwargs: dict[str, dict[str, str]] = {} for name, param in task.parameters.items(): if param.kind == "literal": resolved_params[name] = param.value elif param.kind == "promoted": - resolved_params[name] = params[param.id] + resolved_params[name] = state.params[param.id] elif param.kind == "metadata": column = param.column if column.kind == "literal": column_name = str(column.value) elif column.kind == "promoted": - column_name = str(params[column.id]) + column_name = str(state.params[column.id]) else: raise TypeError(f"Unsupported metadata column kind: {column.kind!r}") metadata_column_kwargs[name] = {"source": name, "column": column_name} else: raise TypeError(f"Unsupported parameter kind: {param.kind!r}") - safe_id = task.id.replace("/", "_").replace(" ", "_") - outputs = { - name: str((work_path / f"{safe_id}_{name}").resolve()) - for name in task.outputs - } + outputs = build_task_outputs( + task_id=task.id, + output_names=task.outputs.keys(), + work_path=state.work_path, + ) request = TaskExecutionRequest( task=task, - cwd=cwd, - work_path=work_path, + cwd=state.cwd, + work_path=state.work_path, archive_inputs=archive_inputs, metadata_inputs=metadata_inputs, params=resolved_params, @@ -219,37 +141,29 @@ def _execute_plugin_action( raise RuntimeError( f"Task {task.id!r} did not produce output {output_name!r}." ) - scope[dest.id] = actual_path - - -def _resolve_monitor(*, console: Console | None, monitor: Monitor | None) -> Monitor: - if monitor is not None: - return monitor - if console is not None: - return RichMonitor(console=console) - return LogMonitor() + state.scope[dest.id] = actual_path def _save_outputs( *, sig, arguments: AdagioArguments, - scope: dict[str, str], + state: SerialExecutionState, monitor: Monitor | None, ) -> None: if isinstance(arguments.outputs, str): os.makedirs(arguments.outputs, exist_ok=True) for output in sig.outputs: - if output.id not in scope: + if output.id not in state.scope: raise KeyError(f"Missing output value for {output.name!r} ({output.id}).") - source_path = Path(scope[output.id]) - destination = _resolve_output_destination( + source_path = Path(state.scope[output.id]) + destination = resolve_output_destination( output_name=output.name, + output_names=[item.name for item in sig.outputs], outputs=arguments.outputs, source_path=source_path, - sig=sig, ) parent = os.path.dirname(destination) @@ -276,45 +190,3 @@ def _save_outputs( destination=destination, status="succeeded", ) - - -def _resolve_output_destination( - *, - output_name: str, - outputs: str | dict[str, str], - source_path: Path, - sig, -) -> str: - suffix = source_path.suffix - - if isinstance(outputs, str): - return _append_output_suffix(os.path.join(outputs, output_name), suffix) - - if isinstance(outputs, dict): - raw_dest = outputs.get(output_name) - if raw_dest is None: - expected_outputs = ", ".join(sorted(item.name for item in sig.outputs)) - provided_outputs = ", ".join(sorted(outputs.keys())) or "" - raise KeyError( - "Missing destination for output " - f"{output_name!r}. Expected output names: [{expected_outputs}]. " - f"Provided output names: [{provided_outputs}]." - ) - return _append_output_suffix(raw_dest, suffix) - - raise TypeError("Unsupported outputs configuration.") - - -def _append_output_suffix(destination: str, suffix: str) -> str: - if suffix and not destination.endswith(suffix): - return destination + suffix - return destination - - -def _resolve_host_path(*, source: str, cwd: Path) -> str: - if is_uri(source): - return source - path = Path(source) - if path.is_absolute(): - return str(path.resolve()) - return str((cwd / path).resolve()) diff --git a/src/adagio/monitor/composite.py b/src/adagio/monitor/composite.py index a465ed6..5c208c9 100644 --- a/src/adagio/monitor/composite.py +++ b/src/adagio/monitor/composite.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from .api import Monitor diff --git a/src/adagio/monitor/connected.py b/src/adagio/monitor/connected.py index 913b657..520ea8e 100644 --- a/src/adagio/monitor/connected.py +++ b/src/adagio/monitor/connected.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import urllib.error import urllib.request From 9c1911f372d95fdf273816ee735b43a25aa09898 Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 19 Mar 2026 20:32:23 -0700 Subject: [PATCH 24/44] Add cache-backed task result reuse --- src/adagio/cli/dynamic.py | 58 +++++++++++- src/adagio/cli/main.py | 27 +++++- src/adagio/cli/runner.py | 22 +++++ src/adagio/cli/runtime.py | 35 ++++++++ src/adagio/cli/task_exec.py | 65 +++++++++----- src/adagio/executors/base.py | 5 ++ src/adagio/executors/cache_support.py | 102 ++++++++++++++++++++++ src/adagio/executors/docker.py | 9 ++ src/adagio/executors/serial_runner.py | 4 + src/adagio/executors/task_contract.py | 4 + src/adagio/executors/task_environments.py | 13 +++ 11 files changed, 321 insertions(+), 23 deletions(-) create mode 100644 src/adagio/executors/cache_support.py diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 9b3e773..74f10dd 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -8,6 +8,11 @@ from ..app.parsers.pipeline import Input as InputSpec from ..app.parsers.pipeline import Parameter as ParamSpec +from ..executors.cache_support import ( + CACHE_DIR_HELP, + NO_RECYCLE_HELP, + RECYCLE_POOL_HELP, +) from .args import ParamType, ShowParamsMode, dynamic_opt, to_identifier @@ -125,7 +130,16 @@ def build_dynamic_run( required_inputs: list[str] = [] required_params: list[str] = [] seen_idents: set[str] = set() - seen_opts: set[str] = {"--pipeline", "-p", "--arguments", "--show-params"} + seen_opts: set[str] = { + "--pipeline", + "-p", + "--arguments", + "--show-params", + "--cache-dir", + "--use-cache", + "--recycle-pool", + "--no-recycle", + } argument_inputs = argument_inputs or {} argument_params = argument_params or {} command_group = Group("Command Options", sort_key=0) @@ -162,6 +176,30 @@ def build_dynamic_run( help="Parameter display mode: all, missing, or required.", ), ] + annotations["cache_dir"] = Annotated[ + Path | None, + CliParameter( + name=("--cache-dir", "--use-cache"), + group=command_group, + help=CACHE_DIR_HELP, + ), + ] + annotations["recycle_pool"] = Annotated[ + str | None, + CliParameter( + name=("--recycle-pool",), + group=command_group, + help=RECYCLE_POOL_HELP, + ), + ] + annotations["no_recycle"] = Annotated[ + bool, + CliParameter( + name=("--no-recycle",), + group=command_group, + help=NO_RECYCLE_HELP, + ), + ] parameters: list[inspect.Parameter] = [ inspect.Parameter( @@ -181,6 +219,24 @@ def build_dynamic_run( default=ShowParamsMode.REQUIRED, annotation=annotations["show_params"], ), + inspect.Parameter( + name="cache_dir", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["cache_dir"], + ), + inspect.Parameter( + name="recycle_pool", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["recycle_pool"], + ), + inspect.Parameter( + name="no_recycle", + kind=inspect.Parameter.KEYWORD_ONLY, + default=False, + annotation=annotations["no_recycle"], + ), ] def add_dynamic_option( diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index dbfb133..ca9586d 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -9,6 +9,7 @@ from ..app.parsers.pipeline import Input as InputSpec from ..app.parsers.pipeline import Parameter as ParamSpec +from ..executors.cache_support import CACHE_DIR_HELP, NO_RECYCLE_HELP, RECYCLE_POOL_HELP from ..app.parsers.pipeline import parse_inputs, parse_parameters from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .dynamic import build_dynamic_run @@ -84,9 +85,33 @@ def run( help="Parameter display mode: all, missing, or required.", ), ] = ShowParamsMode.REQUIRED, + cache_dir: Annotated[ + Path | None, + Parameter( + name=("--cache-dir", "--use-cache"), + group=command_group, + help=CACHE_DIR_HELP, + ), + ] = None, + recycle_pool: Annotated[ + str | None, + Parameter( + name=("--recycle-pool",), + group=command_group, + help=RECYCLE_POOL_HELP, + ), + ] = None, + no_recycle: Annotated[ + bool, + Parameter( + name=("--no-recycle",), + group=command_group, + help=NO_RECYCLE_HELP, + ), + ] = False, ): """Run a pipeline (requires --pipeline; dynamic options come from that file).""" - _ = show_params + _ = (show_params, cache_dir, recycle_pool, no_recycle) raise SystemExit( "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" ) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index ad04765..3ba82bb 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -5,6 +5,12 @@ from rich.console import Console +from ..executors.cache_support import ( + describe_cache_config, + resolve_cache_config, + validate_cache_settings, +) + DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" @@ -23,6 +29,11 @@ def run_pipeline_from_kwargs( from ..model.arguments import AdagioArgumentsFile from ..model.pipeline import AdagioPipeline + cache_dir = kwargs.pop("cache_dir", None) + recycle_pool = kwargs.pop("recycle_pool", None) + no_recycle = bool(kwargs.pop("no_recycle", False)) + validate_cache_settings(recycle_pool=recycle_pool, no_recycle=no_recycle) + data = json.loads(pipeline.read_text(encoding="utf-8")) pipeline_data = data.get("spec", data) if isinstance(data, dict) else data parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) @@ -94,6 +105,16 @@ def run_pipeline_from_kwargs( if not suppress_header: console.print(f"[bold]Pipeline:[/bold] {pipeline}") + cache_config = resolve_cache_config( + cwd=Path.cwd().resolve(), + cache_dir=cache_dir, + recycle_pool=recycle_pool, + no_recycle=no_recycle, + ) + + if not suppress_header: + console.print(f"[bold]Cache:[/bold] {describe_cache_config(cache_config)}") + from ..executors import select_default_executor executor = select_default_executor() @@ -105,6 +126,7 @@ def run_pipeline_from_kwargs( pipeline=parsed_pipeline, arguments=arguments, console=console, + cache_config=cache_config, ) diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 4ea13c9..5a76911 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -8,6 +8,13 @@ from rich.console import Console +from ..executors.cache_support import ( + CACHE_DIR_HELP, + NO_RECYCLE_HELP, + RECYCLE_POOL_HELP, + resolve_cache_config, + validate_cache_settings, +) from ..model.arguments import AdagioArguments from ..model.pipeline import AdagioPipeline from ..monitor.composite import CompositeMonitor @@ -34,6 +41,23 @@ def run_runtime(argv: list[str], *, console: Console) -> None: parser.add_argument("--job-id", required=False, help="Runtime job ID.") parser.add_argument("--output-dir", required=False, help="Directory for output artifacts.") parser.add_argument("--runtime-url", required=False, help="Runtime adapter API base URL.") + parser.add_argument( + "--cache-dir", + "--use-cache", + dest="cache_dir", + required=False, + help=CACHE_DIR_HELP, + ) + parser.add_argument( + "--recycle-pool", + required=False, + help=RECYCLE_POOL_HELP, + ) + parser.add_argument( + "--no-recycle", + action="store_true", + help=NO_RECYCLE_HELP, + ) parser.add_argument( "--connected", action="store_true", @@ -44,6 +68,10 @@ def run_runtime(argv: list[str], *, console: Console) -> None: spec_data = _load_json(Path(opts.spec)) _load_runtime_config(Path(opts.config)) + validate_cache_settings( + recycle_pool=opts.recycle_pool, + no_recycle=opts.no_recycle, + ) runtime_arguments: Any = {} if opts.arguments: runtime_arguments = _load_json(Path(opts.arguments)) @@ -58,6 +86,12 @@ def run_runtime(argv: list[str], *, console: Console) -> None: output_dir=output_dir, ) _validate_required_arguments(pipeline, arguments) + cache_config = resolve_cache_config( + cwd=Path.cwd().resolve(), + cache_dir=opts.cache_dir, + recycle_pool=opts.recycle_pool, + no_recycle=opts.no_recycle, + ) connected = bool(opts.connected and opts.job_id and (opts.runtime_url or os.getenv("RUNTIME_URL"))) runtime_url = opts.runtime_url or os.getenv("RUNTIME_URL") @@ -87,6 +121,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: arguments=arguments, console=console, monitor=monitor, + cache_config=cache_config, ) except Exception as exc: # noqa: BLE001 if connected and runtime_url and opts.job_id: diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py index 4e26a83..573cf59 100644 --- a/src/adagio/cli/task_exec.py +++ b/src/adagio/cli/task_exec.py @@ -1,6 +1,7 @@ """Internal exec-task subcommand: runs a single QIIME action inside a plugin container.""" import argparse +from contextlib import nullcontext import os import sys import warnings @@ -25,7 +26,7 @@ def run_task_exec(argv: list[str]) -> None: def _run_task(spec: dict[str, Any]) -> None: - from qiime2 import Artifact, Metadata + from qiime2 import Artifact, Cache, Metadata from qiime2.sdk import PluginManager plugin_name: str = spec["plugin"] @@ -36,6 +37,8 @@ def _run_task(spec: dict[str, Any]) -> None: metadata_column_kwargs: dict[str, dict[str, str]] = spec.get("metadata_column_kwargs", {}) outputs: dict[str, str] = spec["outputs"] result_manifest: str | None = spec.get("result_manifest") + cache_path: str | None = spec.get("cache_path") + recycle_pool: str | None = spec.get("recycle_pool") plugin_manager = PluginManager() @@ -55,32 +58,46 @@ def _run_task(spec: dict[str, Any]) -> None: f"Available actions (first 30): [{available}]" ) - kwargs: dict[str, Any] = {} + cache = Cache(cache_path) if cache_path else None + cache_context = cache if cache is not None else nullcontext() - for name, path in archive_inputs.items(): - kwargs[name] = Artifact.load(path) + with cache_context: + kwargs: dict[str, Any] = {} - loaded_metadata: dict[str, Metadata] = {} - for name, path in metadata_inputs.items(): - if zipfile.is_zipfile(path): - loaded_metadata[name] = Artifact.load(path).view(Metadata) - else: - loaded_metadata[name] = Metadata.load(path) + for name, path in archive_inputs.items(): + loaded = Artifact.load(path) + kwargs[name] = _cache_loaded_input(cache=cache, value=loaded) - for param_name, col_spec in metadata_column_kwargs.items(): - source_name: str = col_spec["source"] - column_name: str = col_spec["column"] - metadata = loaded_metadata.pop(source_name) - kwargs[param_name] = metadata.get_column(column_name) + loaded_metadata: dict[str, Metadata] = {} + for name, path in metadata_inputs.items(): + if zipfile.is_zipfile(path): + loaded_metadata[name] = Artifact.load(path).view(Metadata) + else: + loaded_metadata[name] = Metadata.load(path) - for name, metadata in loaded_metadata.items(): - kwargs[name] = metadata + for param_name, col_spec in metadata_column_kwargs.items(): + source_name: str = col_spec["source"] + column_name: str = col_spec["column"] + metadata = loaded_metadata.pop(source_name) + kwargs[param_name] = metadata.get_column(column_name) - for name, value in params.items(): - kwargs[name] = _coerce_param(action=action, name=name, value=value) + for name, metadata in loaded_metadata.items(): + kwargs[name] = metadata - with action_output_context(): - results = action(**kwargs) + for name, value in params.items(): + kwargs[name] = _coerce_param(action=action, name=name, value=value) + + if recycle_pool is not None and cache is None: + raise ValueError("A recycle pool requires a configured cache path.") + + recycle_context = ( + cache.create_pool(key=recycle_pool, reuse=True) + if recycle_pool is not None and cache is not None + else nullcontext() + ) + with recycle_context: + with action_output_context(): + results = action(**kwargs) saved_outputs: dict[str, str] = {} for name, dest_path in outputs.items(): @@ -91,6 +108,12 @@ def _run_task(spec: dict[str, Any]) -> None: write_json_file(Path(result_manifest), saved_outputs) +def _cache_loaded_input(*, cache: Any, value: Any) -> Any: + if cache is None: + return value + return cache.process_pool.save(value) + + def _resolve_key(mapping: Any, requested: str) -> Any: if requested in mapping: return mapping[requested] diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index d023074..d66c00c 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -9,6 +9,8 @@ from adagio.model.task import PluginActionTask from adagio.monitor.api import Monitor +from .cache_support import ExecutionCacheConfig + class PipelineExecutor(Protocol): mode_label: str @@ -20,6 +22,7 @@ def execute( arguments: AdagioArguments, console: Console | None = None, monitor: Monitor | None = None, + cache_config: ExecutionCacheConfig | None = None, ) -> None: ... @@ -41,6 +44,8 @@ class TaskExecutionRequest: params: Mapping[str, Any] metadata_column_kwargs: Mapping[str, Mapping[str, str]] outputs: Mapping[str, str] + cache_path: str | None = None + recycle_pool: str | None = None @dataclass(frozen=True) diff --git a/src/adagio/executors/cache_support.py b/src/adagio/executors/cache_support.py new file mode 100644 index 0000000..5fbb2c2 --- /dev/null +++ b/src/adagio/executors/cache_support.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from pathlib import Path + +DEFAULT_CACHE_DIRNAME = ".adagio/cache" +DEFAULT_RECYCLE_POOL = "adagio-recycle" + +CACHE_DIR_ENV_VAR = "ADAGIO_CACHE_DIR" +RECYCLE_POOL_ENV_VAR = "ADAGIO_RECYCLE_POOL" +NO_RECYCLE_ENV_VAR = "ADAGIO_NO_RECYCLE" + +CACHE_DIR_HELP = ( + "Path to the shared QIIME cache Adagio should use for reusable task results. " + "Defaults to /storage/adagio-cache when /storage exists, otherwise ./.adagio/cache." +) +RECYCLE_POOL_HELP = ( + "Named recycle pool used for task result reuse. Defaults to a persistent " + f"pool named {DEFAULT_RECYCLE_POOL!r}." +) +NO_RECYCLE_HELP = ( + "Disable reuse of cached task results for this run while still using the selected cache." +) + + +@dataclass(frozen=True) +class ExecutionCacheConfig: + cache_dir: Path + recycle_pool: str | None = None + + +def validate_cache_settings(*, recycle_pool: str | None, no_recycle: bool) -> None: + if recycle_pool is not None and no_recycle: + raise SystemExit( + "Cannot set --recycle-pool and --no-recycle at the same time." + ) + + +def resolve_cache_config( + *, + cwd: Path, + cache_dir: str | Path | None, + recycle_pool: str | None, + no_recycle: bool, +) -> ExecutionCacheConfig: + env_cache_dir = os.getenv(CACHE_DIR_ENV_VAR) if cache_dir is None else None + env_recycle_pool = ( + os.getenv(RECYCLE_POOL_ENV_VAR) if recycle_pool is None else None + ) + env_no_recycle = _is_truthy(os.getenv(NO_RECYCLE_ENV_VAR)) + + resolved_cache_dir = _resolve_cache_dir( + cwd=cwd, + raw_value=cache_dir if cache_dir is not None else env_cache_dir, + ) + resolved_no_recycle = no_recycle or env_no_recycle + resolved_recycle_pool = ( + None + if resolved_no_recycle + else (recycle_pool or env_recycle_pool or DEFAULT_RECYCLE_POOL) + ) + + return ExecutionCacheConfig( + cache_dir=resolved_cache_dir, + recycle_pool=resolved_recycle_pool, + ) + + +def mount_path_for_cache(cache_dir: Path) -> Path: + return cache_dir if cache_dir.exists() else cache_dir.parent + + +def describe_cache_config(config: ExecutionCacheConfig) -> str: + if config.recycle_pool is None: + return f"{config.cache_dir} (recycle disabled)" + return f"{config.cache_dir} (pool: {config.recycle_pool})" + + +def _resolve_cache_dir(*, cwd: Path, raw_value: str | Path | None) -> Path: + candidate = _default_cache_dir(cwd=cwd) if raw_value is None else Path(raw_value) + candidate = candidate.expanduser() + if not candidate.is_absolute(): + candidate = (cwd / candidate).resolve() + else: + candidate = candidate.resolve() + + candidate.parent.mkdir(parents=True, exist_ok=True) + return candidate + + +def _default_cache_dir(*, cwd: Path) -> Path: + storage_root = Path("/storage") + if storage_root.exists(): + return (storage_root / "adagio-cache").resolve() + return (cwd / DEFAULT_CACHE_DIRNAME).resolve() + + +def _is_truthy(value: str | None) -> bool: + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index b97ac16..dc9a501 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -9,6 +9,7 @@ TaskExecutionRequest, TaskExecutionResult, ) +from .cache_support import mount_path_for_cache from .container_support import ( containerize_host_value, containerize_path, @@ -64,6 +65,12 @@ def launch( metadata_column_kwargs=dict(request.metadata_column_kwargs), outputs=outputs, result_manifest=containerize_path(manifest_path), + cache_path=( + containerize_path(Path(request.cache_path)) + if request.cache_path is not None + else None + ), + recycle_pool=request.recycle_pool, ) write_json_file(spec_path, task_spec) @@ -93,6 +100,8 @@ def launch( path = Path(value) if path.is_absolute(): host_paths.append(path) + if request.cache_path is not None: + host_paths.append(mount_path_for_cache(Path(request.cache_path))) command = with_mounts(command=command, host_paths=host_paths) diff --git a/src/adagio/executors/serial_runner.py b/src/adagio/executors/serial_runner.py index 9ea78ef..19cff49 100644 --- a/src/adagio/executors/serial_runner.py +++ b/src/adagio/executors/serial_runner.py @@ -11,6 +11,7 @@ from adagio.monitor.log import LogMonitor from adagio.monitor.tty import RichMonitor +from .cache_support import ExecutionCacheConfig from .common import plan_execution_order, task_label from .path_utils import resolve_host_path @@ -23,6 +24,7 @@ class SerialExecutionState: work_path: Path params: dict[str, t.Any] scope: dict[str, str] + cache_config: ExecutionCacheConfig | None def run_serial_pipeline( @@ -34,6 +36,7 @@ def run_serial_pipeline( console: Console | None = None, monitor: Monitor | None = None, total_subtasks: int = CONTAINER_SUBTASK_COUNT, + cache_config: ExecutionCacheConfig | None = None, ) -> None: sig = pipeline.signature tasks = list(pipeline.iter_tasks()) @@ -50,6 +53,7 @@ def run_serial_pipeline( work_path=Path(work_dir), params=sig.get_params(arguments), scope={}, + cache_config=cache_config, ) completed_task_ids: set[str] = set() diff --git a/src/adagio/executors/task_contract.py b/src/adagio/executors/task_contract.py index ad67b43..f93542a 100644 --- a/src/adagio/executors/task_contract.py +++ b/src/adagio/executors/task_contract.py @@ -38,6 +38,8 @@ def build_task_spec( metadata_column_kwargs: dict[str, dict[str, str]], outputs: dict[str, str], result_manifest: str | None, + cache_path: str | None, + recycle_pool: str | None, ) -> dict[str, Any]: return { "plugin": plugin, @@ -48,6 +50,8 @@ def build_task_spec( "metadata_column_kwargs": metadata_column_kwargs, "outputs": outputs, "result_manifest": result_manifest, + "cache_path": cache_path, + "recycle_pool": recycle_pool, } diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py index f04d90b..049d787 100644 --- a/src/adagio/executors/task_environments.py +++ b/src/adagio/executors/task_environments.py @@ -14,6 +14,7 @@ TaskEnvironmentResolver, TaskExecutionRequest, ) +from .cache_support import ExecutionCacheConfig from .path_utils import resolve_output_destination from .serial_runner import SerialExecutionState, run_serial_pipeline from .task_contract import build_task_outputs @@ -38,6 +39,7 @@ def execute( arguments: AdagioArguments, console: Console | None = None, monitor: Monitor | None = None, + cache_config: ExecutionCacheConfig | None = None, ) -> None: run_serial_pipeline( pipeline=pipeline, @@ -46,6 +48,7 @@ def execute( finish_outputs=_save_outputs, console=console, monitor=monitor, + cache_config=cache_config, ) def _resolve_task( @@ -128,6 +131,16 @@ def _execute_plugin_action( params=resolved_params, metadata_column_kwargs=metadata_column_kwargs, outputs=outputs, + cache_path=( + str(state.cache_config.cache_dir) + if state.cache_config is not None + else None + ), + recycle_pool=( + state.cache_config.recycle_pool + if state.cache_config is not None + else None + ), ) result = launcher.launch( environment=environment, From afdec7e1f0585c6aa107c709be5f8b906564d9dc Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 19 Mar 2026 22:02:28 -0700 Subject: [PATCH 25/44] Adds caching --- README.md | 28 ++++-- src/adagio/cli/cache.py | 71 +++++++++++++++ src/adagio/cli/dynamic.py | 40 +++------ src/adagio/cli/main.py | 33 ++++--- src/adagio/cli/runner.py | 8 +- src/adagio/cli/runtime.py | 27 ++---- src/adagio/cli/task_exec.py | 104 +++++++++++++++++++++- src/adagio/executors/base.py | 1 + src/adagio/executors/cache_support.py | 73 +++------------ src/adagio/executors/docker.py | 6 +- src/adagio/executors/serial_runner.py | 9 +- src/adagio/executors/task_contract.py | 23 +++++ src/adagio/executors/task_environments.py | 11 +-- src/adagio/monitor/tty.py | 5 +- 14 files changed, 288 insertions(+), 151 deletions(-) create mode 100644 src/adagio/cli/cache.py diff --git a/README.md b/README.md index 5a502fa..1776481 100644 --- a/README.md +++ b/README.md @@ -42,32 +42,48 @@ adagio run --help Run with a pipeline file: ```bash -adagio run --pipeline path/to/pipeline.json +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache ``` `adagio run` executes each plugin task in its own task environment. Today the default task environment is a Docker image in GHCR derived from the plugin name in the pipeline spec, for example `dada2` -> `ghcr.io/cymis/qiime2-plugin-dada2:2026.1`. +The cache directory is required and is reused across reruns by default so unchanged +successful tasks can be replayed. Equivalent positional form: ```bash -adagio run path/to/pipeline.json +adagio run path/to/pipeline.json --cache-dir /path/to/cache ``` Use an arguments file: ```bash -adagio run --pipeline path/to/pipeline.json --arguments path/to/arguments.json +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --arguments path/to/arguments.json ``` Control which dynamic flags are shown in help: ```bash -adagio run --pipeline path/to/pipeline.json --show-params required +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --show-params required # choices: all | missing | required ``` +Disable reuse for a run while still writing outputs into the selected cache directory: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --no-reuse +``` + +The same boolean pair is available as `--reuse` / `--no-reuse`. `--reuse` is the default. + +Clear an existing cache directory: + +```bash +adagio cache clear --cache-dir /path/to/cache +``` + ### Arguments file format `--arguments` can be downloaded from Adagio directly in the "Run" workflow : @@ -144,7 +160,7 @@ uv run ruff format . ### Running locally during development ```bash -uv run adagio run --pipeline path/to/pipeline.json +uv run adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache ``` ### Runtime entrypoint (container/integration use) @@ -152,5 +168,5 @@ uv run adagio run --pipeline path/to/pipeline.json The `runtime` subcommand is intended for runtime-adapter jobs: ```bash -uv run adagio runtime --spec spec.json --config config.json --arguments arguments.json +uv run adagio runtime --spec spec.json --config config.json --arguments arguments.json --cache-dir /path/to/cache ``` diff --git a/src/adagio/cli/cache.py b/src/adagio/cli/cache.py new file mode 100644 index 0000000..3566d71 --- /dev/null +++ b/src/adagio/cli/cache.py @@ -0,0 +1,71 @@ +import argparse +import re +import shutil +from pathlib import Path + +from rich.console import Console + +from ..executors.cache_support import CACHE_DIR_HELP, resolve_cache_dir_path + +QIIME_CACHE_CONTENTS = {"VERSION", "data", "keys", "pools", "processes"} +QIIME_CACHE_VERSION_RE = re.compile(r"^QIIME 2\ncache: v?\d+\nframework: 20\d\d\.\d+\Z") + + +def run_cache(argv: list[str], *, console: Console) -> None: + parser = argparse.ArgumentParser( + prog="adagio cache", + description="Manage Adagio's shared QIIME cache directory.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + clear_parser = subparsers.add_parser( + "clear", + help="Delete an existing cache directory.", + description=( + "Delete an existing QIIME cache directory. " + "Only run this when no jobs are actively using the cache." + ), + ) + clear_parser.add_argument( + "--cache-dir", + required=True, + help=CACHE_DIR_HELP, + ) + + opts = parser.parse_args(argv) + + if opts.command == "clear": + cache_dir = resolve_cache_dir_path( + cwd=Path.cwd().resolve(), + raw_value=opts.cache_dir, + ) + _clear_cache(cache_dir=cache_dir, console=console) + return + + raise SystemExit(f"Unknown cache command: {opts.command}") + + +def _clear_cache(*, cache_dir: Path, console: Console) -> None: + _require_qiime_cache(cache_dir) + shutil.rmtree(cache_dir) + console.print(f"Cleared cache directory: {cache_dir}") + + +def _require_qiime_cache(cache_dir: Path) -> None: + if not cache_dir.exists(): + raise SystemExit(f"Cache directory does not exist: {cache_dir}") + if not cache_dir.is_dir(): + raise SystemExit(f"Cache path is not a directory: {cache_dir}") + + contents = set(item.name for item in cache_dir.iterdir()) + if not contents.issuperset(QIIME_CACHE_CONTENTS): + raise SystemExit(f"Path is not a QIIME cache: {cache_dir}") + + version_file = cache_dir / "VERSION" + try: + version_text = version_file.read_text(encoding="utf-8").strip() + except OSError as exc: + raise SystemExit(f"Could not read cache version file: {version_file}") from exc + + if not QIIME_CACHE_VERSION_RE.fullmatch(version_text): + raise SystemExit(f"Path is not a QIIME cache: {cache_dir}") diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 74f10dd..48d62d1 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -10,8 +10,7 @@ from ..app.parsers.pipeline import Parameter as ParamSpec from ..executors.cache_support import ( CACHE_DIR_HELP, - NO_RECYCLE_HELP, - RECYCLE_POOL_HELP, + REUSE_HELP, ) from .args import ParamType, ShowParamsMode, dynamic_opt, to_identifier @@ -136,9 +135,8 @@ def build_dynamic_run( "--arguments", "--show-params", "--cache-dir", - "--use-cache", - "--recycle-pool", - "--no-recycle", + "--reuse", + "--no-reuse", } argument_inputs = argument_inputs or {} argument_params = argument_params or {} @@ -177,27 +175,20 @@ def build_dynamic_run( ), ] annotations["cache_dir"] = Annotated[ - Path | None, + Path, CliParameter( - name=("--cache-dir", "--use-cache"), + name=("--cache-dir",), group=command_group, help=CACHE_DIR_HELP, ), ] - annotations["recycle_pool"] = Annotated[ - str | None, - CliParameter( - name=("--recycle-pool",), - group=command_group, - help=RECYCLE_POOL_HELP, - ), - ] - annotations["no_recycle"] = Annotated[ + annotations["reuse"] = Annotated[ bool, CliParameter( - name=("--no-recycle",), + name=("--reuse",), + negative=("--no-reuse",), group=command_group, - help=NO_RECYCLE_HELP, + help=REUSE_HELP, ), ] @@ -222,20 +213,13 @@ def build_dynamic_run( inspect.Parameter( name="cache_dir", kind=inspect.Parameter.KEYWORD_ONLY, - default=None, annotation=annotations["cache_dir"], ), inspect.Parameter( - name="recycle_pool", - kind=inspect.Parameter.KEYWORD_ONLY, - default=None, - annotation=annotations["recycle_pool"], - ), - inspect.Parameter( - name="no_recycle", + name="reuse", kind=inspect.Parameter.KEYWORD_ONLY, - default=False, - annotation=annotations["no_recycle"], + default=True, + annotation=annotations["reuse"], ), ] diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index ca9586d..6f4f136 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -9,8 +9,8 @@ from ..app.parsers.pipeline import Input as InputSpec from ..app.parsers.pipeline import Parameter as ParamSpec -from ..executors.cache_support import CACHE_DIR_HELP, NO_RECYCLE_HELP, RECYCLE_POOL_HELP from ..app.parsers.pipeline import parse_inputs, parse_parameters +from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .dynamic import build_dynamic_run from .qapi import build_qapi @@ -29,6 +29,12 @@ def main(argv: list[str] | None = None) -> None: run_task_exec(argv[1:]) return + if argv and argv[0] == "cache": + from .cache import run_cache + + run_cache(argv[1:], console=console) + return + if argv and argv[0] == "runtime": from .runtime import run_runtime @@ -86,32 +92,25 @@ def run( ), ] = ShowParamsMode.REQUIRED, cache_dir: Annotated[ - Path | None, + Path, Parameter( - name=("--cache-dir", "--use-cache"), + name=("--cache-dir",), group=command_group, help=CACHE_DIR_HELP, ), - ] = None, - recycle_pool: Annotated[ - str | None, - Parameter( - name=("--recycle-pool",), - group=command_group, - help=RECYCLE_POOL_HELP, - ), - ] = None, - no_recycle: Annotated[ + ], + reuse: Annotated[ bool, Parameter( - name=("--no-recycle",), + name=("--reuse",), + negative=("--no-reuse",), group=command_group, - help=NO_RECYCLE_HELP, + help=REUSE_HELP, ), - ] = False, + ] = True, ): """Run a pipeline (requires --pipeline; dynamic options come from that file).""" - _ = (show_params, cache_dir, recycle_pool, no_recycle) + _ = (show_params, cache_dir, reuse) raise SystemExit( "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" ) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 3ba82bb..e7f6152 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -8,7 +8,6 @@ from ..executors.cache_support import ( describe_cache_config, resolve_cache_config, - validate_cache_settings, ) DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" @@ -30,9 +29,7 @@ def run_pipeline_from_kwargs( from ..model.pipeline import AdagioPipeline cache_dir = kwargs.pop("cache_dir", None) - recycle_pool = kwargs.pop("recycle_pool", None) - no_recycle = bool(kwargs.pop("no_recycle", False)) - validate_cache_settings(recycle_pool=recycle_pool, no_recycle=no_recycle) + reuse = bool(kwargs.pop("reuse", True)) data = json.loads(pipeline.read_text(encoding="utf-8")) pipeline_data = data.get("spec", data) if isinstance(data, dict) else data @@ -108,8 +105,7 @@ def run_pipeline_from_kwargs( cache_config = resolve_cache_config( cwd=Path.cwd().resolve(), cache_dir=cache_dir, - recycle_pool=recycle_pool, - no_recycle=no_recycle, + reuse=reuse, ) if not suppress_header: diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 5a76911..19df9a3 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -10,10 +10,8 @@ from ..executors.cache_support import ( CACHE_DIR_HELP, - NO_RECYCLE_HELP, - RECYCLE_POOL_HELP, + REUSE_HELP, resolve_cache_config, - validate_cache_settings, ) from ..model.arguments import AdagioArguments from ..model.pipeline import AdagioPipeline @@ -43,20 +41,14 @@ def run_runtime(argv: list[str], *, console: Console) -> None: parser.add_argument("--runtime-url", required=False, help="Runtime adapter API base URL.") parser.add_argument( "--cache-dir", - "--use-cache", - dest="cache_dir", - required=False, + required=True, help=CACHE_DIR_HELP, ) parser.add_argument( - "--recycle-pool", - required=False, - help=RECYCLE_POOL_HELP, - ) - parser.add_argument( - "--no-recycle", - action="store_true", - help=NO_RECYCLE_HELP, + "--reuse", + action=argparse.BooleanOptionalAction, + default=True, + help=REUSE_HELP, ) parser.add_argument( "--connected", @@ -68,10 +60,6 @@ def run_runtime(argv: list[str], *, console: Console) -> None: spec_data = _load_json(Path(opts.spec)) _load_runtime_config(Path(opts.config)) - validate_cache_settings( - recycle_pool=opts.recycle_pool, - no_recycle=opts.no_recycle, - ) runtime_arguments: Any = {} if opts.arguments: runtime_arguments = _load_json(Path(opts.arguments)) @@ -89,8 +77,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: cache_config = resolve_cache_config( cwd=Path.cwd().resolve(), cache_dir=opts.cache_dir, - recycle_pool=opts.recycle_pool, - no_recycle=opts.no_recycle, + reuse=opts.reuse, ) connected = bool(opts.connected and opts.job_id and (opts.runtime_url or os.getenv("RUNTIME_URL"))) diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py index 573cf59..375af4a 100644 --- a/src/adagio/cli/task_exec.py +++ b/src/adagio/cli/task_exec.py @@ -1,6 +1,7 @@ """Internal exec-task subcommand: runs a single QIIME action inside a plugin container.""" import argparse +from collections.abc import Mapping from contextlib import nullcontext import os import sys @@ -9,7 +10,11 @@ from pathlib import Path from typing import Any -from adagio.executors.task_contract import read_json_file, write_json_file +from adagio.executors.task_contract import ( + build_result_manifest, + read_json_file, + write_json_file, +) def run_task_exec(argv: list[str]) -> None: @@ -60,6 +65,7 @@ def _run_task(spec: dict[str, Any]) -> None: cache = Cache(cache_path) if cache_path else None cache_context = cache if cache is not None else nullcontext() + reused = False with cache_context: kwargs: dict[str, Any] = {} @@ -87,6 +93,8 @@ def _run_task(spec: dict[str, Any]) -> None: for name, value in params.items(): kwargs[name] = _coerce_param(action=action, name=name, value=value) + _materialize_default_parameters(action=action, kwargs=kwargs) + if recycle_pool is not None and cache is None: raise ValueError("A recycle pool requires a configured cache path.") @@ -96,8 +104,13 @@ def _run_task(spec: dict[str, Any]) -> None: else nullcontext() ) with recycle_context: - with action_output_context(): - results = action(**kwargs) + cached_results = _load_cached_results(cache=cache, action=action, kwargs=kwargs) + if cached_results is not None: + reused = True + results = cached_results + else: + with action_output_context(): + results = action(**kwargs) saved_outputs: dict[str, str] = {} for name, dest_path in outputs.items(): @@ -105,7 +118,10 @@ def _run_task(spec: dict[str, Any]) -> None: saved_outputs[name] = artifact.save(dest_path) if result_manifest: - write_json_file(Path(result_manifest), saved_outputs) + write_json_file( + Path(result_manifest), + build_result_manifest(outputs=saved_outputs, reused=reused), + ) def _cache_loaded_input(*, cache: Any, value: Any) -> Any: @@ -114,6 +130,86 @@ def _cache_loaded_input(*, cache: Any, value: Any) -> Any: return cache.process_pool.save(value) +def _materialize_default_parameters(*, action: Any, kwargs: dict[str, Any]) -> None: + signature = getattr(action, "signature", None) + parameters = getattr(signature, "parameters", None) + if not isinstance(parameters, Mapping): + return + + for name, spec in parameters.items(): + has_default = getattr(spec, "has_default", None) + if name in kwargs or not callable(has_default) or not has_default(): + continue + kwargs[name] = spec.default + + +def _load_cached_results(*, cache: Any, action: Any, kwargs: dict[str, Any]) -> Any: + if cache is None: + return None + + named_pool = getattr(cache, "named_pool", None) + if named_pool is None: + return None + + named_pool.create_index() + invocation = _build_invocation(action=action, kwargs=kwargs) + if invocation not in named_pool.index: + return None + + from qiime2.core.type.util import is_collection_type + from qiime2.sdk import ResultCollection, Results + + try: + cached_outputs = named_pool.index[invocation] + loaded_outputs: dict[str, Any] = {} + for name, output_spec in action.signature.outputs.items(): + if is_collection_type(output_spec.qiime_type): + cached_collection = cached_outputs[name] + collection_order = list(cached_collection.keys()) + if not _validate_collection_order(collection_order): + return None + + collection_order.sort(key=lambda x: x.idx) + loaded_collection = ResultCollection() + for elem_info in collection_order: + loaded_collection[elem_info.item_name] = named_pool.load( + cached_collection[elem_info] + ) + loaded_outputs[name] = loaded_collection + else: + loaded_outputs[name] = named_pool.load(cached_outputs[name]) + except KeyError: + return None + + return Results(loaded_outputs.keys(), loaded_outputs.values()) + + +def _build_invocation(*, action: Any, kwargs: dict[str, Any]) -> Any: + from rachis.core.type.signature import HashableInvocation + + plugin = action.plugin_id.replace("_", "-") + plugin_action = f"{plugin}:{action.id}" + collated_inputs = action.signature.collate_inputs(**kwargs) + callable_args = action.signature.coerce_user_input(**collated_inputs) + arguments = [] + for name, value in callable_args.items(): + arguments.append({name: value}) + return HashableInvocation(plugin_action, arguments) + + +def _validate_collection_order(collection_order: list[Any]) -> bool: + if not collection_order: + return True + if not all( + elem.total == collection_order[0].total for elem in collection_order + ) or len(collection_order) != collection_order[0].total: + warnings.warn( + "Incomplete collection found when recycling, collection will be remade" + ) + return False + return True + + def _resolve_key(mapping: Any, requested: str) -> Any: if requested in mapping: return mapping[requested] diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index d66c00c..697606d 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -51,6 +51,7 @@ class TaskExecutionRequest: @dataclass(frozen=True) class TaskExecutionResult: outputs: Mapping[str, str] + reused: bool = False class TaskEnvironmentResolver(Protocol): diff --git a/src/adagio/executors/cache_support.py b/src/adagio/executors/cache_support.py index 5fbb2c2..4d84c97 100644 --- a/src/adagio/executors/cache_support.py +++ b/src/adagio/executors/cache_support.py @@ -1,26 +1,13 @@ from __future__ import annotations -import os from dataclasses import dataclass from pathlib import Path -DEFAULT_CACHE_DIRNAME = ".adagio/cache" DEFAULT_RECYCLE_POOL = "adagio-recycle" -CACHE_DIR_ENV_VAR = "ADAGIO_CACHE_DIR" -RECYCLE_POOL_ENV_VAR = "ADAGIO_RECYCLE_POOL" -NO_RECYCLE_ENV_VAR = "ADAGIO_NO_RECYCLE" - -CACHE_DIR_HELP = ( - "Path to the shared QIIME cache Adagio should use for reusable task results. " - "Defaults to /storage/adagio-cache when /storage exists, otherwise ./.adagio/cache." -) -RECYCLE_POOL_HELP = ( - "Named recycle pool used for task result reuse. Defaults to a persistent " - f"pool named {DEFAULT_RECYCLE_POOL!r}." -) -NO_RECYCLE_HELP = ( - "Disable reuse of cached task results for this run while still using the selected cache." +CACHE_DIR_HELP = "Path to the shared QIIME cache directory. Required." +REUSE_HELP = ( + "Reuse matching prior task results from the selected cache. Enabled by default." ) @@ -30,36 +17,15 @@ class ExecutionCacheConfig: recycle_pool: str | None = None -def validate_cache_settings(*, recycle_pool: str | None, no_recycle: bool) -> None: - if recycle_pool is not None and no_recycle: - raise SystemExit( - "Cannot set --recycle-pool and --no-recycle at the same time." - ) - - def resolve_cache_config( *, cwd: Path, cache_dir: str | Path | None, - recycle_pool: str | None, - no_recycle: bool, + reuse: bool, ) -> ExecutionCacheConfig: - env_cache_dir = os.getenv(CACHE_DIR_ENV_VAR) if cache_dir is None else None - env_recycle_pool = ( - os.getenv(RECYCLE_POOL_ENV_VAR) if recycle_pool is None else None - ) - env_no_recycle = _is_truthy(os.getenv(NO_RECYCLE_ENV_VAR)) - - resolved_cache_dir = _resolve_cache_dir( - cwd=cwd, - raw_value=cache_dir if cache_dir is not None else env_cache_dir, - ) - resolved_no_recycle = no_recycle or env_no_recycle - resolved_recycle_pool = ( - None - if resolved_no_recycle - else (recycle_pool or env_recycle_pool or DEFAULT_RECYCLE_POOL) - ) + resolved_cache_dir = resolve_cache_dir_path(cwd=cwd, raw_value=cache_dir) + resolved_cache_dir.parent.mkdir(parents=True, exist_ok=True) + resolved_recycle_pool = DEFAULT_RECYCLE_POOL if reuse else None return ExecutionCacheConfig( cache_dir=resolved_cache_dir, @@ -73,30 +39,19 @@ def mount_path_for_cache(cache_dir: Path) -> Path: def describe_cache_config(config: ExecutionCacheConfig) -> str: if config.recycle_pool is None: - return f"{config.cache_dir} (recycle disabled)" - return f"{config.cache_dir} (pool: {config.recycle_pool})" + return f"{config.cache_dir} (reuse disabled)" + return f"{config.cache_dir} (reuse enabled)" -def _resolve_cache_dir(*, cwd: Path, raw_value: str | Path | None) -> Path: - candidate = _default_cache_dir(cwd=cwd) if raw_value is None else Path(raw_value) +def resolve_cache_dir_path(*, cwd: Path, raw_value: str | Path | None) -> Path: + if raw_value is None: + raise SystemExit("Missing required --cache-dir.") + + candidate = Path(raw_value) candidate = candidate.expanduser() if not candidate.is_absolute(): candidate = (cwd / candidate).resolve() else: candidate = candidate.resolve() - candidate.parent.mkdir(parents=True, exist_ok=True) return candidate - - -def _default_cache_dir(*, cwd: Path) -> Path: - storage_root = Path("/storage") - if storage_root.exists(): - return (storage_root / "adagio-cache").resolve() - return (cwd / DEFAULT_CACHE_DIRNAME).resolve() - - -def _is_truthy(value: str | None) -> bool: - if value is None: - return False - return value.strip().lower() in {"1", "true", "yes", "on"} diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index dc9a501..5e54c29 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -22,6 +22,7 @@ with_mounts, ) from .task_contract import ( + parse_result_manifest, build_task_spec, read_json_file, result_manifest_path, @@ -145,13 +146,14 @@ def launch( ) output_manifest = read_json_file(manifest_path) + reported_outputs, reused = parse_result_manifest(output_manifest) outputs = {} for output_name in request.outputs: - actual_path = output_manifest.get(output_name) + actual_path = reported_outputs.get(output_name) if not isinstance(actual_path, str): raise RuntimeError( f"Task {task.id!r} did not report output {output_name!r}." ) outputs[output_name] = str(host_path_from_container(actual_path)) - return TaskExecutionResult(outputs=outputs) + return TaskExecutionResult(outputs=outputs, reused=reused) diff --git a/src/adagio/executors/serial_runner.py b/src/adagio/executors/serial_runner.py index 19cff49..f70ef20 100644 --- a/src/adagio/executors/serial_runner.py +++ b/src/adagio/executors/serial_runner.py @@ -31,7 +31,7 @@ def run_serial_pipeline( *, pipeline: AdagioPipeline, arguments: AdagioArguments, - resolve_task: t.Callable[[t.Any, SerialExecutionState, Console | None], None], + resolve_task: t.Callable[[t.Any, SerialExecutionState, Console | None], bool], finish_outputs: t.Callable[[t.Any, AdagioArguments, SerialExecutionState, Monitor | None], None], console: Console | None = None, monitor: Monitor | None = None, @@ -75,9 +75,12 @@ def run_serial_pipeline( for task in execution_plan: active_monitor.start_task(task_id=task.id) try: - resolve_task(task, state, console) + reused = resolve_task(task, state, console) active_monitor.advance_task(task_id=task.id, advance=1) - active_monitor.finish_task(task_id=task.id, status="completed") + active_monitor.finish_task( + task_id=task.id, + status="cached" if reused else "completed", + ) completed_task_ids.add(task.id) except Exception as exc: # noqa: BLE001 active_monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) diff --git a/src/adagio/executors/task_contract.py b/src/adagio/executors/task_contract.py index f93542a..fc1b05b 100644 --- a/src/adagio/executors/task_contract.py +++ b/src/adagio/executors/task_contract.py @@ -1,4 +1,5 @@ import json +from collections.abc import Mapping from pathlib import Path from typing import Any, Iterable @@ -55,6 +56,28 @@ def build_task_spec( } +def build_result_manifest( + *, + outputs: Mapping[str, str], + reused: bool, +) -> dict[str, Any]: + return { + "outputs": dict(outputs), + "reused": reused, + } + + +def parse_result_manifest(payload: dict[str, Any]) -> tuple[dict[str, str], bool]: + if "outputs" in payload: + outputs = payload.get("outputs", {}) + reused = bool(payload.get("reused", False)) + if not isinstance(outputs, dict): + raise TypeError("Invalid task result manifest: 'outputs' must be an object.") + return dict(outputs), reused + + return dict(payload), False + + def read_json_file(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py index 049d787..4401f85 100644 --- a/src/adagio/executors/task_environments.py +++ b/src/adagio/executors/task_environments.py @@ -56,20 +56,19 @@ def _resolve_task( task, state: SerialExecutionState, console: Console | None, - ) -> None: + ) -> bool: if isinstance(task, RootInputTask): for name, src in task.inputs.items(): dst = task.outputs[name] state.scope[dst.id] = state.scope[src.id] - return + return False if isinstance(task, PluginActionTask): - self._execute_plugin_action( + return self._execute_plugin_action( task=task, state=state, console=console, ) - return raise TypeError(f"Unsupported task type: {type(task)}") @@ -79,7 +78,7 @@ def _execute_plugin_action( task: PluginActionTask, state: SerialExecutionState, console: Console | None, - ) -> None: + ) -> bool: environment = self._environment_resolver.resolve(task=task) launcher = self._launchers.get(environment.kind) if launcher is None: @@ -156,6 +155,8 @@ def _execute_plugin_action( ) state.scope[dest.id] = actual_path + return result.reused + def _save_outputs( *, diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 5e7dc63..48111bb 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -42,6 +42,7 @@ def __init__(self, *, console: Console | None = None): self._task_lookup: dict[str, _TaskState] = {} self._status_counts: dict[str, int] = { "completed": 0, + "cached": 0, "failed": 0, "skipped": 0, } @@ -126,7 +127,7 @@ def finish_task( task.status = status task.error = error task.finished_at = time.monotonic() - if status in {"completed", "skipped"}: + if status in {"completed", "cached", "skipped"}: task.completed_subtasks = task.total_subtasks if status in self._status_counts: self._status_counts[status] += 1 @@ -146,6 +147,7 @@ def finish_pipeline(self) -> None: self._console.print( "Summary: " f"{self._status_counts['completed']} completed, " + f"{self._status_counts['cached']} cached, " f"{self._status_counts['failed']} failed, " f"{self._status_counts['skipped']} skipped, " f"{max(pending, 0)} pending" @@ -201,6 +203,7 @@ def _status_style(status: str) -> tuple[str, str]: "pending": ("PENDING", "yellow"), "running": ("RUNNING", "cyan"), "completed": ("DONE", "green"), + "cached": ("CACHED", "blue"), "failed": ("FAILED", "red"), "skipped": ("SKIPPED", "magenta"), } From 08f6d4790ce34042e89d8e916b6f6fa82011ea53 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 19 Mar 2026 22:19:42 -0700 Subject: [PATCH 26/44] Fixes remove cache command --- src/adagio/cli/cache.py | 71 ++++++++++++++++++++++++----------------- src/adagio/cli/main.py | 10 ++++++ 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/adagio/cli/cache.py b/src/adagio/cli/cache.py index 3566d71..83f99f0 100644 --- a/src/adagio/cli/cache.py +++ b/src/adagio/cli/cache.py @@ -1,48 +1,44 @@ -import argparse import re import shutil from pathlib import Path +from typing import Annotated +from cyclopts import App, Group, Parameter from rich.console import Console from ..executors.cache_support import CACHE_DIR_HELP, resolve_cache_dir_path QIIME_CACHE_CONTENTS = {"VERSION", "data", "keys", "pools", "processes"} -QIIME_CACHE_VERSION_RE = re.compile(r"^QIIME 2\ncache: v?\d+\nframework: 20\d\d\.\d+\Z") +QIIME_CACHE_LINE_RE = re.compile(r"cache: v?\d+\Z") def run_cache(argv: list[str], *, console: Console) -> None: - parser = argparse.ArgumentParser( - prog="adagio cache", - description="Manage Adagio's shared QIIME cache directory.", + app = App( + name="adagio cache", + help="Manage Adagio's shared QIIME cache directory.", ) - subparsers = parser.add_subparsers(dest="command", required=True) - - clear_parser = subparsers.add_parser( - "clear", - help="Delete an existing cache directory.", - description=( - "Delete an existing QIIME cache directory. " - "Only run this when no jobs are actively using the cache." - ), - ) - clear_parser.add_argument( - "--cache-dir", - required=True, - help=CACHE_DIR_HELP, - ) - - opts = parser.parse_args(argv) - - if opts.command == "clear": - cache_dir = resolve_cache_dir_path( + command_group = Group("Command Options", sort_key=0) + + @app.command + def clear( + *, + cache_dir: Annotated[ + Path, + Parameter( + name=("--cache-dir",), + group=command_group, + help=CACHE_DIR_HELP, + ), + ], + ) -> None: + """Delete an existing QIIME cache directory.""" + resolved_cache_dir = resolve_cache_dir_path( cwd=Path.cwd().resolve(), - raw_value=opts.cache_dir, + raw_value=str(cache_dir), ) - _clear_cache(cache_dir=cache_dir, console=console) - return + _clear_cache(cache_dir=resolved_cache_dir, console=console) - raise SystemExit(f"Unknown cache command: {opts.command}") + app(argv) def _clear_cache(*, cache_dir: Path, console: Console) -> None: @@ -67,5 +63,20 @@ def _require_qiime_cache(cache_dir: Path) -> None: except OSError as exc: raise SystemExit(f"Could not read cache version file: {version_file}") from exc - if not QIIME_CACHE_VERSION_RE.fullmatch(version_text): + if not _looks_like_qiime_cache_version(version_text): raise SystemExit(f"Path is not a QIIME cache: {cache_dir}") + + +def _looks_like_qiime_cache_version(version_text: str) -> bool: + lines = version_text.splitlines() + if len(lines) != 3: + return False + + if lines[0] != "QIIME 2": + return False + + if not QIIME_CACHE_LINE_RE.fullmatch(lines[1]): + return False + + framework_prefix = "framework: " + return lines[2].startswith(framework_prefix) and bool(lines[2][len(framework_prefix) :].strip()) diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 6f4f136..b78578f 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -61,6 +61,16 @@ def main(argv: list[str] | None = None) -> None: ) app.command(build_qapi, name="build-qapi") + @app.command + def cache() -> None: + """Manage the shared QIIME cache directory.""" + raise SystemExit("Try: adagio cache --help") + + @app.command + def runtime() -> None: + """Execute a pipeline from spec/config/arguments files.""" + raise SystemExit("Try: adagio runtime --help") + if not pipeline_str: command_group = Group("Command Options", sort_key=0) From a7e9a7472abeac3eb972a3798f02759305f53347 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 19 Mar 2026 23:34:40 -0700 Subject: [PATCH 27/44] Fixes error formatting --- src/adagio/cli/main.py | 30 +++++++++++++++++------------- src/adagio/cli/runner.py | 39 ++++++++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index b78578f..f8a8ce1 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -5,6 +5,7 @@ from typing import Annotated, Any from cyclopts import App, Group, Parameter +from cyclopts.panel import CycloptsPanel from rich.console import Console from ..app.parsers.pipeline import Input as InputSpec @@ -48,10 +49,9 @@ def main(argv: list[str] | None = None) -> None: show_mode = ( ShowParamsMode(show_mode_str) if show_mode_str else ShowParamsMode.REQUIRED ) - except ValueError as exc: - raise SystemExit( - "Invalid --show-params value. Use one of: all, missing, required." - ) from exc + except ValueError: + console.print(CycloptsPanel("Invalid --show-params value. Use one of: all, missing, required.")) + sys.exit(1) if pipeline_str is None: pipeline_str = positional_pipeline @@ -64,12 +64,14 @@ def main(argv: list[str] | None = None) -> None: @app.command def cache() -> None: """Manage the shared QIIME cache directory.""" - raise SystemExit("Try: adagio cache --help") + console.print(CycloptsPanel("Try: adagio cache --help")) + sys.exit(1) @app.command def runtime() -> None: """Execute a pipeline from spec/config/arguments files.""" - raise SystemExit("Try: adagio runtime --help") + console.print(CycloptsPanel("Try: adagio runtime --help")) + sys.exit(1) if not pipeline_str: command_group = Group("Command Options", sort_key=0) @@ -121,9 +123,8 @@ def run( ): """Run a pipeline (requires --pipeline; dynamic options come from that file).""" _ = (show_params, cache_dir, reuse) - raise SystemExit( - "Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help" - ) + console.print(CycloptsPanel("Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help")) + sys.exit(1) app(argv) return @@ -134,7 +135,7 @@ def run( param_specs = parse_parameters(data) arguments_path_str = extract_flag_value(argv, "--arguments") arguments_data = ( - _load_arguments_data(Path(arguments_path_str)) if arguments_path_str else None + _load_arguments_data(Path(arguments_path_str), console) if arguments_path_str else None ) visible_inputs, visible_params = _filter_visible_specs( input_specs=input_specs, @@ -197,10 +198,12 @@ def _filter_visible_specs( return filtered_inputs, filtered_params -def _load_arguments_data(path: Path) -> dict[str, Any]: +def _load_arguments_data(path: Path, _console: Console | None = None) -> dict[str, Any]: + _con = _console or Console(stderr=True) data = json.loads(path.read_text(encoding="utf-8")) if not isinstance(data, dict): - raise SystemExit("Invalid arguments file: expected a JSON object.") + _con.print(CycloptsPanel("Invalid arguments file: expected a JSON object.")) + sys.exit(1) if "inputs" not in data: data["inputs"] = {} if "parameters" not in data: @@ -208,7 +211,8 @@ def _load_arguments_data(path: Path) -> dict[str, Any]: if not isinstance(data.get("inputs"), dict) or not isinstance( data.get("parameters"), dict ): - raise SystemExit("Invalid arguments file: 'inputs' and 'parameters' must be objects.") + _con.print(CycloptsPanel("Invalid arguments file: 'inputs' and 'parameters' must be objects.")) + sys.exit(1) return data diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index e7f6152..e5b1187 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,15 +1,32 @@ import json import os +import sys from pathlib import Path from typing import Any +from rich import box from rich.console import Console +from rich.panel import Panel +from rich.text import Text from ..executors.cache_support import ( describe_cache_config, resolve_cache_config, ) + +def _error_exit(console: Console, message: str) -> None: + panel = Panel( + Text.from_markup(message), + title="Error", + border_style="red", + box=box.ROUNDED, + expand=True, + title_align="left", + ) + console.print(panel) + sys.exit(1) + DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" @@ -47,23 +64,17 @@ def run_pipeline_from_kwargs( unknown_inputs = sorted(set(arguments_data.inputs) - input_names) if unknown_inputs: - raise SystemExit( - "Unknown inputs in arguments file: " + ", ".join(unknown_inputs) - ) + _error_exit(console, "Unknown inputs in arguments file: " + ", ".join(unknown_inputs)) unknown_params = sorted(set(arguments_data.parameters) - param_names) if unknown_params: - raise SystemExit( - "Unknown parameters in arguments file: " + ", ".join(unknown_params) - ) + _error_exit(console, "Unknown parameters in arguments file: " + ", ".join(unknown_params)) unknown_outputs: list[str] = [] if isinstance(arguments_data.outputs, dict): unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) if unknown_outputs: - raise SystemExit( - "Unknown outputs in arguments file: " + ", ".join(unknown_outputs) - ) + _error_exit(console, "Unknown outputs in arguments file: " + ", ".join(unknown_outputs)) arguments.inputs.update(arguments_data.inputs) arguments.parameters.update(arguments_data.parameters) @@ -87,10 +98,12 @@ def run_pipeline_from_kwargs( name for name in required_params if _is_missing(arguments.parameters.get(name)) ] if missing_inputs or missing_params: - missing = [f"input:{name}" for name in missing_inputs] + [ - f"param:{name}" for name in missing_params - ] - raise SystemExit("Missing required arguments: " + ", ".join(missing)) + missing_opts = ( + [f"--input-{n.replace('_', '-')}" for n in missing_inputs] + + [f"--param-{n.replace('_', '-')}" for n in missing_params] + ) + formatted = ", ".join(f"[cyan]{opt}[/cyan]" for opt in missing_opts) + _error_exit(console, f"Missing required arguments: {formatted}") arguments.outputs = _resolve_output_destinations( outputs=arguments.outputs, From 96c050ac925f30e506e1fc0272b351ce32a1c2cf Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 20 Mar 2026 13:34:06 -0700 Subject: [PATCH 28/44] Adds config toml --- README.md | 31 ++++++++++++++++++++++- pyproject.toml | 2 +- src/adagio/cli/config.py | 29 +++++++++++++++++++++ src/adagio/cli/dynamic.py | 18 +++++++++++++ src/adagio/cli/main.py | 14 ++++++++++- src/adagio/cli/runner.py | 16 +++++++++++- src/adagio/cli/runtime.py | 31 +++++++++++++++-------- src/adagio/executors/__init__.py | 17 ++++++++++--- src/adagio/executors/defaults.py | 43 ++++++++++++++++++++++++++++++++ src/adagio/model/task.py | 3 ++- 10 files changed, 185 insertions(+), 19 deletions(-) create mode 100644 src/adagio/cli/config.py diff --git a/README.md b/README.md index 1776481..de2ad7c 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,12 @@ Use an arguments file: adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --arguments path/to/arguments.json ``` +Use a runtime config file with plugin-level defaults and optional task overrides: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --config path/to/runtime.toml +``` + Control which dynamic flags are shown in help: ```bash @@ -114,6 +120,29 @@ adagio cache clear --cache-dir /path/to/cache If outputs are omitted, defaults are generated under `./adagio-outputs`. +### Runtime config format + +`--config` accepts TOML. Plugin keys provide the common case, and task keys can +override individual actions when needed: + +```toml +version = 1 + +[plugins] +dada2 = { image = "ghcr.io/cymis/qiime2-plugin-dada2:2026.1" } +demux = { image = "ghcr.io/cymis/qiime2-plugin-demux:2026.1" } + +[tasks] +"dada2.denoise_single" = { image = "registry.internal/custom-dada2:1.0" } +``` + +Precedence is `task override > plugin override > default resolver`. + +Task lookup supports graph node `id`, optional task `name` when present in the +pipeline, and `plugin.action` as a fallback. Plugin lookup uses the pipeline's +plugin name. Anything not listed in the config uses the default plugin image +resolver. + ### QAPI generation/submission Generate and submit plugin metadata from the active QIIME environment: @@ -168,5 +197,5 @@ uv run adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache The `runtime` subcommand is intended for runtime-adapter jobs: ```bash -uv run adagio runtime --spec spec.json --config config.json --arguments arguments.json --cache-dir /path/to/cache +uv run adagio runtime --spec spec.json --config runtime.toml --arguments arguments.json --cache-dir /path/to/cache ``` diff --git a/pyproject.toml b/pyproject.toml index 6f90f2e..5024d72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "0.0.0" description = "Adagio command line tool" readme = "README.md" requires-python = ">=3.10" -dependencies = ["cyclopts>=4.5.3", "pydantic>=2.12.5", "rich>=14.1.0", "parsl>=2024.12.16"] +dependencies = ["cyclopts>=4.5.3", "pydantic>=2.12.5", "rich>=14.1.0", "parsl>=2024.12.16", "tomli>=2.2.1; python_version < '3.11'"] [dependency-groups] diff --git a/src/adagio/cli/config.py b/src/adagio/cli/config.py new file mode 100644 index 0000000..f5365e9 --- /dev/null +++ b/src/adagio/cli/config.py @@ -0,0 +1,29 @@ +from pathlib import Path + +from pydantic import BaseModel, Field + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover + import tomli as tomllib + + +class ImageOverride(BaseModel): + image: str + + +class AdagioRunConfig(BaseModel): + version: int = 1 + plugins: dict[str, ImageOverride] = Field(default_factory=dict) + tasks: dict[str, ImageOverride] = Field(default_factory=dict) + + +def load_run_config(path: Path | None) -> AdagioRunConfig | None: + if path is None: + return None + + data = tomllib.loads(path.read_text(encoding="utf-8")) + if not isinstance(data, dict): + raise SystemExit("Invalid config file: expected a TOML table.") + + return AdagioRunConfig.model_validate(data) diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 48d62d1..f044996 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -114,6 +114,7 @@ def build_dynamic_run( [ Path, Path | None, + Path | None, dict[str, Any], list[tuple[str, str]], list[tuple[str, str]], @@ -133,6 +134,7 @@ def build_dynamic_run( "--pipeline", "-p", "--arguments", + "--config", "--show-params", "--cache-dir", "--reuse", @@ -174,6 +176,14 @@ def build_dynamic_run( help="Parameter display mode: all, missing, or required.", ), ] + annotations["config_file"] = Annotated[ + Path | None, + CliParameter( + name=("--config",), + group=command_group, + help="Path to a TOML runtime config file.", + ), + ] annotations["cache_dir"] = Annotated[ Path, CliParameter( @@ -210,6 +220,12 @@ def build_dynamic_run( default=ShowParamsMode.REQUIRED, annotation=annotations["show_params"], ), + inspect.Parameter( + name="config_file", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["config_file"], + ), inspect.Parameter( name="cache_dir", kind=inspect.Parameter.KEYWORD_ONLY, @@ -329,12 +345,14 @@ def run( pipeline: Path, arguments_file: Path | None = None, show_params: ShowParamsMode = ShowParamsMode.REQUIRED, + config_file: Path | None = None, **kwargs: Any, ) -> None: _ = show_params run_handler( pipeline, arguments_file, + config_file, kwargs, input_bindings, param_bindings, diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index f8a8ce1..cfa5da0 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -13,6 +13,7 @@ from ..app.parsers.pipeline import parse_inputs, parse_parameters from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline +from .config import load_run_config from .dynamic import build_dynamic_run from .qapi import build_qapi from .runner import run_pipeline_from_kwargs @@ -95,6 +96,14 @@ def run( help="Path to a JSON arguments file.", ), ] = None, + config: Annotated[ + Path | None, + Parameter( + name=("--config",), + group=command_group, + help="Path to a TOML runtime config file.", + ), + ] = None, show_params: Annotated[ ShowParamsMode, Parameter( @@ -122,7 +131,7 @@ def run( ] = True, ): """Run a pipeline (requires --pipeline; dynamic options come from that file).""" - _ = (show_params, cache_dir, reuse) + _ = (config, show_params, cache_dir, reuse) console.print(CycloptsPanel("Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help")) sys.exit(1) @@ -134,9 +143,12 @@ def run( input_specs = parse_inputs(data) param_specs = parse_parameters(data) arguments_path_str = extract_flag_value(argv, "--arguments") + config_path_str = extract_flag_value(argv, "--config") arguments_data = ( _load_arguments_data(Path(arguments_path_str), console) if arguments_path_str else None ) + if config_path_str: + load_run_config(Path(config_path_str)) visible_inputs, visible_params = _filter_visible_specs( input_specs=input_specs, param_specs=param_specs, diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index e5b1187..ecfef6f 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -9,6 +9,7 @@ from rich.panel import Panel from rich.text import Text +from .config import load_run_config from ..executors.cache_support import ( describe_cache_config, resolve_cache_config, @@ -33,6 +34,7 @@ def _error_exit(console: Console, message: str) -> None: def run_pipeline_from_kwargs( pipeline: Path, arguments_file: Path | None, + config_file: Path | None, kwargs: dict[str, Any], input_bindings: list[tuple[str, str]], param_bindings: list[tuple[str, str]], @@ -52,6 +54,7 @@ def run_pipeline_from_kwargs( pipeline_data = data.get("spec", data) if isinstance(data, dict) else data parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) arguments = parsed_pipeline.signature.to_default_arguments() + run_config = load_run_config(config_file) output_names = [output.name for output in parsed_pipeline.signature.outputs] input_names = {name for _, name in input_bindings} @@ -126,7 +129,18 @@ def run_pipeline_from_kwargs( from ..executors import select_default_executor - executor = select_default_executor() + executor = select_default_executor( + plugin_image_overrides=( + {name: override.image for name, override in run_config.plugins.items()} + if run_config is not None + else None + ), + task_image_overrides=( + {name: override.image for name, override in run_config.tasks.items()} + if run_config is not None + else None + ) + ) if not suppress_header: console.print(f"[bold]Executing pipeline[/bold] ({executor.mode_label})") diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 19df9a3..2c78fd8 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -18,6 +18,7 @@ from ..monitor.composite import CompositeMonitor from ..monitor.connected import ConnectedMonitor from ..monitor.log import LogMonitor +from .config import load_run_config def run_runtime(argv: list[str], *, console: Console) -> None: @@ -26,14 +27,14 @@ def run_runtime(argv: list[str], *, console: Console) -> None: prog="adagio runtime", description=( "Execute a pipeline from spec/config/arguments files. " - "The config file is currently validated for compatibility but does not alter runtime behavior." + "The config file may define per-plugin and per-task container image overrides." ), ) parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") parser.add_argument( "--config", required=True, - help="Path to config JSON. The file is validated for compatibility but otherwise unused.", + help="Path to runtime config TOML.", ) parser.add_argument("--arguments", required=False, help="Path to run arguments JSON.") parser.add_argument("--job-id", required=False, help="Runtime job ID.") @@ -59,7 +60,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: opts = parser.parse_args(argv) spec_data = _load_json(Path(opts.spec)) - _load_runtime_config(Path(opts.config)) + run_config = load_run_config(Path(opts.config)) runtime_arguments: Any = {} if opts.arguments: runtime_arguments = _load_json(Path(opts.arguments)) @@ -100,7 +101,10 @@ def run_runtime(argv: list[str], *, console: Console) -> None: from ..executors import select_default_executor - executor = select_default_executor() + executor = select_default_executor( + plugin_image_overrides=_plugin_image_overrides(run_config), + task_image_overrides=_task_image_overrides(run_config), + ) try: executor.execute( @@ -131,13 +135,6 @@ def _load_json(path: Path) -> Any: return json.loads(path.read_text(encoding="utf-8")) -def _load_runtime_config(path: Path) -> dict[str, Any]: - config = _load_json(path) - if not isinstance(config, dict): - raise SystemExit("Invalid runtime config: expected a JSON object.") - return config - - def _parse_pipeline(data: Any) -> AdagioPipeline: pipeline_data = data.get("spec", data) if isinstance(data, dict) else data return AdagioPipeline.model_validate(pipeline_data) @@ -154,6 +151,18 @@ def _resolve_output_dir(raw_output_dir: str | None, job_id: str | None) -> str: return output_dir +def _task_image_overrides(run_config: Any) -> dict[str, str] | None: + if run_config is None: + return None + return {name: override.image for name, override in run_config.tasks.items()} + + +def _plugin_image_overrides(run_config: Any) -> dict[str, str] | None: + if run_config is None: + return None + return {name: override.image for name, override in run_config.plugins.items()} + + def _build_arguments( *, pipeline: AdagioPipeline, diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py index d1b7849..9df01d8 100644 --- a/src/adagio/executors/__init__.py +++ b/src/adagio/executors/__init__.py @@ -1,13 +1,24 @@ __all__ = ["select_default_executor"] -def select_default_executor(): - from .defaults import DefaultTaskEnvironmentResolver +def select_default_executor( + *, + plugin_image_overrides: dict[str, str] | None = None, + task_image_overrides: dict[str, str] | None = None, +): + from .defaults import ( + ConfigurableTaskEnvironmentResolver, + DefaultTaskEnvironmentResolver, + ) from .docker import DockerTaskEnvironmentLauncher from .task_environments import TaskEnvironmentExecutor return TaskEnvironmentExecutor( - environment_resolver=DefaultTaskEnvironmentResolver(), + environment_resolver=ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + plugin_image_overrides=plugin_image_overrides, + task_image_overrides=task_image_overrides, + ), launchers={ "docker": DockerTaskEnvironmentLauncher(), }, diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py index 72ef0ab..ee323d5 100644 --- a/src/adagio/executors/defaults.py +++ b/src/adagio/executors/defaults.py @@ -34,3 +34,46 @@ def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: reference=reference, description=f"default plugin image for {task.plugin}", ) + + +class ConfigurableTaskEnvironmentResolver(TaskEnvironmentResolver): + def __init__( + self, + *, + base: TaskEnvironmentResolver, + plugin_image_overrides: dict[str, str] | None = None, + task_image_overrides: dict[str, str] | None = None, + ) -> None: + self._base = base + self._plugin_image_overrides = plugin_image_overrides or {} + self._task_image_overrides = task_image_overrides or {} + + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: + override = self._find_override(task=task) + if override is None: + return self._base.resolve(task=task) + + return TaskEnvironmentSpec( + kind="docker", + reference=override, + description=f"configured image override for {task.name or task.id}", + ) + + def _find_override(self, *, task: PluginActionTask) -> str | None: + candidates = [task.id] + if task.name: + candidates.insert(0, task.name) + candidates.append(f"{task.plugin}.{task.action}") + + for candidate in candidates: + override = self._task_image_overrides.get(candidate) + if override: + return override + + plugin_candidates = [task.plugin, task.plugin.lower()] + for candidate in plugin_candidates: + override = self._plugin_image_overrides.get(candidate) + if override: + return override + + return None diff --git a/src/adagio/model/task.py b/src/adagio/model/task.py index 2e91602..997f2af 100644 --- a/src/adagio/model/task.py +++ b/src/adagio/model/task.py @@ -16,6 +16,7 @@ def exec(self, ctx, params, scope): class PluginActionTask(_BaseTask): id: str kind: t.Literal['plugin-action'] + name: str | None = None plugin: str action: str @@ -32,7 +33,7 @@ def exec(self, ctx, params, scope): # store for second pass in params metadata[name] = scope[src.id] else: - raise NotImplemented('impossible') + raise NotImplementedError('impossible') for name, param in self.parameters.items(): if param.kind == 'metadata': From 980f31658362555f5dcf8c559426f5913ccd0379 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 23 Mar 2026 21:37:25 -0700 Subject: [PATCH 29/44] Adds explicit architecture check --- README.md | 19 +++++++---- src/adagio/cli/config.py | 9 ++++- src/adagio/cli/runner.py | 44 ++++++++++++++++++++----- src/adagio/cli/runtime.py | 34 ++++++++++++++----- src/adagio/executors/__init__.py | 10 +++--- src/adagio/executors/base.py | 6 ++++ src/adagio/executors/defaults.py | 56 ++++++++++++++++++++++++-------- src/adagio/executors/docker.py | 17 ++++++++-- src/adagio/monitor/tty.py | 2 +- 9 files changed, 150 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index de2ad7c..410270e 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Use an arguments file: adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --arguments path/to/arguments.json ``` -Use a runtime config file with plugin-level defaults and optional task overrides: +Use a runtime config file with defaults, plugin-level overrides, and optional task overrides: ```bash adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --config path/to/runtime.toml @@ -122,26 +122,31 @@ If outputs are omitted, defaults are generated under `./adagio-outputs`. ### Runtime config format -`--config` accepts TOML. Plugin keys provide the common case, and task keys can -override individual actions when needed: +`--config` accepts TOML. Defaults apply first, then plugin keys, then task keys: ```toml version = 1 +[defaults] +platform = "linux/amd64" + [plugins] dada2 = { image = "ghcr.io/cymis/qiime2-plugin-dada2:2026.1" } demux = { image = "ghcr.io/cymis/qiime2-plugin-demux:2026.1" } [tasks] -"dada2.denoise_single" = { image = "registry.internal/custom-dada2:1.0" } +"dada2.denoise_single" = { image = "registry.internal/custom-dada2:1.0", platform = "linux/amd64" } ``` -Precedence is `task override > plugin override > default resolver`. +`image` and `platform` are both optional on defaults, plugin entries, and task entries. + +Precedence is `task override > plugin override > defaults > default resolver`. Task lookup supports graph node `id`, optional task `name` when present in the pipeline, and `plugin.action` as a fallback. Plugin lookup uses the pipeline's -plugin name. Anything not listed in the config uses the default plugin image -resolver. +plugin name. If `platform` is omitted all the way through, Adagio uses normal +Docker platform resolution with no implicit fallback. Anything not listed in the +config uses the default plugin image resolver. ### QAPI generation/submission diff --git a/src/adagio/cli/config.py b/src/adagio/cli/config.py index f5365e9..8838599 100644 --- a/src/adagio/cli/config.py +++ b/src/adagio/cli/config.py @@ -9,11 +9,18 @@ class ImageOverride(BaseModel): - image: str + image: str | None = None + platform: str | None = None + + +class DefaultOverride(BaseModel): + image: str | None = None + platform: str | None = None class AdagioRunConfig(BaseModel): version: int = 1 + defaults: DefaultOverride = Field(default_factory=DefaultOverride) plugins: dict[str, ImageOverride] = Field(default_factory=dict) tasks: dict[str, ImageOverride] = Field(default_factory=dict) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index ecfef6f..9b9371b 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -10,6 +10,7 @@ from rich.text import Text from .config import load_run_config +from ..executors.base import TaskEnvironmentOverride from ..executors.cache_support import ( describe_cache_config, resolve_cache_config, @@ -130,16 +131,13 @@ def run_pipeline_from_kwargs( from ..executors import select_default_executor executor = select_default_executor( - plugin_image_overrides=( - {name: override.image for name, override in run_config.plugins.items()} - if run_config is not None - else None + default_override=_config_default_override(run_config), + plugin_overrides=_config_named_overrides( + run_config.plugins if run_config is not None else {} + ), + task_overrides=_config_named_overrides( + run_config.tasks if run_config is not None else {} ), - task_image_overrides=( - {name: override.image for name, override in run_config.tasks.items()} - if run_config is not None - else None - ) ) if not suppress_header: @@ -191,3 +189,31 @@ def _is_truthy(value: str | None) -> bool: if value is None: return False return value.strip().lower() in {"1", "true", "yes", "on"} + + +def _config_default_override(run_config: Any) -> TaskEnvironmentOverride | None: + if run_config is None: + return None + + defaults = run_config.defaults + if defaults.image is None and defaults.platform is None: + return None + + return TaskEnvironmentOverride( + reference=defaults.image, + platform=defaults.platform, + ) + + +def _config_named_overrides( + raw_overrides: dict[str, Any], +) -> dict[str, TaskEnvironmentOverride] | None: + resolved = { + name: TaskEnvironmentOverride( + reference=override.image, + platform=override.platform, + ) + for name, override in raw_overrides.items() + if override.image is not None or override.platform is not None + } + return resolved or None diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 2c78fd8..2f6709e 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -8,6 +8,7 @@ from rich.console import Console +from ..executors.base import TaskEnvironmentOverride from ..executors.cache_support import ( CACHE_DIR_HELP, REUSE_HELP, @@ -27,7 +28,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: prog="adagio runtime", description=( "Execute a pipeline from spec/config/arguments files. " - "The config file may define per-plugin and per-task container image overrides." + "The config file may define default, per-plugin, and per-task image/platform overrides." ), ) parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") @@ -102,8 +103,9 @@ def run_runtime(argv: list[str], *, console: Console) -> None: from ..executors import select_default_executor executor = select_default_executor( - plugin_image_overrides=_plugin_image_overrides(run_config), - task_image_overrides=_task_image_overrides(run_config), + default_override=_default_override(run_config), + plugin_overrides=_named_overrides(run_config.plugins if run_config is not None else {}), + task_overrides=_named_overrides(run_config.tasks if run_config is not None else {}), ) try: @@ -151,16 +153,30 @@ def _resolve_output_dir(raw_output_dir: str | None, job_id: str | None) -> str: return output_dir -def _task_image_overrides(run_config: Any) -> dict[str, str] | None: +def _default_override(run_config: Any) -> TaskEnvironmentOverride | None: if run_config is None: return None - return {name: override.image for name, override in run_config.tasks.items()} + defaults = run_config.defaults + if defaults.image is None and defaults.platform is None: + return None + return TaskEnvironmentOverride( + reference=defaults.image, + platform=defaults.platform, + ) -def _plugin_image_overrides(run_config: Any) -> dict[str, str] | None: - if run_config is None: - return None - return {name: override.image for name, override in run_config.plugins.items()} +def _named_overrides( + raw_overrides: dict[str, Any], +) -> dict[str, TaskEnvironmentOverride] | None: + resolved = { + name: TaskEnvironmentOverride( + reference=override.image, + platform=override.platform, + ) + for name, override in raw_overrides.items() + if override.image is not None or override.platform is not None + } + return resolved or None def _build_arguments( diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py index 9df01d8..da8647b 100644 --- a/src/adagio/executors/__init__.py +++ b/src/adagio/executors/__init__.py @@ -3,8 +3,9 @@ def select_default_executor( *, - plugin_image_overrides: dict[str, str] | None = None, - task_image_overrides: dict[str, str] | None = None, + default_override=None, + plugin_overrides=None, + task_overrides=None, ): from .defaults import ( ConfigurableTaskEnvironmentResolver, @@ -16,8 +17,9 @@ def select_default_executor( return TaskEnvironmentExecutor( environment_resolver=ConfigurableTaskEnvironmentResolver( base=DefaultTaskEnvironmentResolver(), - plugin_image_overrides=plugin_image_overrides, - task_image_overrides=task_image_overrides, + default_override=default_override, + plugin_overrides=plugin_overrides, + task_overrides=task_overrides, ), launchers={ "docker": DockerTaskEnvironmentLauncher(), diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index 697606d..9b47d6c 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -34,6 +34,12 @@ class TaskEnvironmentSpec: options: Mapping[str, Any] | None = None +@dataclass(frozen=True) +class TaskEnvironmentOverride: + reference: str | None = None + platform: str | None = None + + @dataclass(frozen=True) class TaskExecutionRequest: task: PluginActionTask diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py index ee323d5..bb1b0a7 100644 --- a/src/adagio/executors/defaults.py +++ b/src/adagio/executors/defaults.py @@ -1,6 +1,10 @@ from adagio.model.task import PluginActionTask -from .base import TaskEnvironmentResolver, TaskEnvironmentSpec +from .base import ( + TaskEnvironmentOverride, + TaskEnvironmentResolver, + TaskEnvironmentSpec, +) DEFAULT_REGISTRY = "ghcr.io/cymis" DEFAULT_IMAGE_PREFIX = "qiime2-plugin-" @@ -41,38 +45,62 @@ def __init__( self, *, base: TaskEnvironmentResolver, - plugin_image_overrides: dict[str, str] | None = None, - task_image_overrides: dict[str, str] | None = None, + default_override: TaskEnvironmentOverride | None = None, + plugin_overrides: dict[str, TaskEnvironmentOverride] | None = None, + task_overrides: dict[str, TaskEnvironmentOverride] | None = None, ) -> None: self._base = base - self._plugin_image_overrides = plugin_image_overrides or {} - self._task_image_overrides = task_image_overrides or {} + self._default_override = default_override + self._plugin_overrides = plugin_overrides or {} + self._task_overrides = task_overrides or {} def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: - override = self._find_override(task=task) - if override is None: - return self._base.resolve(task=task) + base_environment = self._base.resolve(task=task) + reference = base_environment.reference + options = dict(base_environment.options or {}) + configured = False + + for override in ( + self._default_override, + self._find_plugin_override(task=task), + self._find_task_override(task=task), + ): + if override is None: + continue + if override.reference is not None: + reference = override.reference + configured = True + if override.platform is not None: + options["platform"] = override.platform + configured = True return TaskEnvironmentSpec( - kind="docker", - reference=override, - description=f"configured image override for {task.name or task.id}", + kind=base_environment.kind, + reference=reference, + description=( + f"configured environment for {task.name or task.id}" + if configured + else base_environment.description + ), + options=options or None, ) - def _find_override(self, *, task: PluginActionTask) -> str | None: + def _find_task_override(self, *, task: PluginActionTask) -> TaskEnvironmentOverride | None: candidates = [task.id] if task.name: candidates.insert(0, task.name) candidates.append(f"{task.plugin}.{task.action}") for candidate in candidates: - override = self._task_image_overrides.get(candidate) + override = self._task_overrides.get(candidate) if override: return override + return None + def _find_plugin_override(self, *, task: PluginActionTask) -> TaskEnvironmentOverride | None: plugin_candidates = [task.plugin, task.plugin.lower()] for candidate in plugin_candidates: - override = self._plugin_image_overrides.get(candidate) + override = self._plugin_overrides.get(candidate) if override: return override diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index 5e54c29..ce211bf 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -76,6 +76,12 @@ def launch( write_json_file(spec_path, task_spec) src_root = local_source_root() + platform = None + if environment.options is not None: + raw_platform = environment.options.get("platform") + if isinstance(raw_platform, str) and raw_platform: + platform = raw_platform + command = [ "docker", "run", @@ -86,13 +92,17 @@ def launch( *python_warning_env_flags(), "-w", containerize_path(request.cwd), + ] + if platform: + command.extend(["--platform", platform]) + command.extend([ environment.reference, "python", "-m", "adagio.cli.task_exec", "--task", containerize_path(spec_path), - ] + ]) host_paths = [request.cwd, request.work_path, src_root] for value in list(request.archive_inputs.values()) + list(request.metadata_inputs.values()): @@ -107,7 +117,10 @@ def launch( command = with_mounts(command=command, host_paths=host_paths) if console is not None: - console.print(f"[dim]Task environment:[/dim] docker {environment.reference}") + label = f"docker {environment.reference}" + if platform: + label = f"docker --platform {platform} {environment.reference}" + console.print(f"[dim]Task environment:[/dim] {label}") try: result = subprocess.run( diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 48111bb..f8c4fc1 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -226,7 +226,7 @@ def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: ratio = min(max(completed / total, 0.0), 1.0) filled = int(round(ratio * width)) empty = width - filled - return f"[{color}]{'━' * filled}[/]{' ' * empty}" + return f"[{color}]{'━' * filled}[/][dim]{'─' * empty}[/]" def _elapsed(task: _TaskState) -> str: From b45559cae673cb469d9d56920b589fe1726e48c4 Mon Sep 17 00:00:00 2001 From: John Chase Date: Tue, 24 Mar 2026 21:32:48 -0700 Subject: [PATCH 30/44] Temporary move imports to be lazy --- src/adagio/execute.py | 2 +- src/adagio/execution/context.py | 37 +++++++++++++++++++--- src/adagio/execution/proxy.py | 54 ++++++++++++++++++++++++++++++--- 3 files changed, 82 insertions(+), 11 deletions(-) diff --git a/src/adagio/execute.py b/src/adagio/execute.py index d6ab826..0e1c1a3 100644 --- a/src/adagio/execute.py +++ b/src/adagio/execute.py @@ -3,7 +3,6 @@ import typing as t -from adagio.execution.context import AdagioContext from adagio.model.arguments import AdagioArguments from adagio.model.pipeline import AdagioPipeline from adagio.monitor.log import LogMonitor @@ -53,6 +52,7 @@ def _setup_context(advanced): # TODO: actually configure a non-temp cache + from adagio.execution.context import AdagioContext from qiime2.sdk import PluginManager PluginManager() diff --git a/src/adagio/execution/context.py b/src/adagio/execution/context.py index e82ad52..b1e49d9 100644 --- a/src/adagio/execution/context.py +++ b/src/adagio/execution/context.py @@ -1,14 +1,41 @@ -from parsl import python_app, join_app +from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl -from qiime2.sdk.proxy import ProxyResults, Proxy -from qiime2.sdk import Pipeline, Results -from qiime2.sdk.context import ParallelContext +_QIIME2_IMPORT_ERROR: ModuleNotFoundError | None = None -from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl +try: + from qiime2.sdk import Pipeline, Results + from qiime2.sdk.context import ParallelContext + from qiime2.sdk.proxy import Proxy, ProxyResults +except ModuleNotFoundError as exc: + _QIIME2_IMPORT_ERROR = exc + + class Proxy: # type: ignore[no-redef] + pass + + class ProxyResults: # type: ignore[no-redef] + pass + + class Pipeline: # type: ignore[no-redef] + pass + + class Results: # type: ignore[no-redef] + pass + + class ParallelContext: # type: ignore[no-redef] + def __init__(self, *_args, **_kwargs): + _require_qiime2() + + +def _require_qiime2() -> None: + if _QIIME2_IMPORT_ERROR is not None: + raise ModuleNotFoundError( + "qiime2 is required for local execution context support." + ) from _QIIME2_IMPORT_ERROR class AdagioContext(ParallelContext): def __init__(self, action_obj=None, parent=None): + _require_qiime2() super().__init__(action_obj, parent) diff --git a/src/adagio/execution/proxy.py b/src/adagio/execution/proxy.py index 44860b3..5236f8c 100644 --- a/src/adagio/execution/proxy.py +++ b/src/adagio/execution/proxy.py @@ -1,9 +1,53 @@ -from qiime2.sdk.proxy import ( - Proxy, ProxyResult, ProxyVisualization, ProxyArtifact, ProxyResults, - ProxyResultCollection) -from qiime2.core.type.util import is_visualization_type, is_collection_type from parsl import python_app, join_app +_QIIME2_IMPORT_ERROR: ModuleNotFoundError | None = None + +try: + from qiime2.core.type.util import is_collection_type, is_visualization_type + from qiime2.sdk.proxy import ( + Proxy, + ProxyArtifact, + ProxyResult, + ProxyResultCollection, + ProxyResults, + ProxyVisualization, + ) +except ModuleNotFoundError as exc: + _QIIME2_IMPORT_ERROR = exc + + class Proxy: # type: ignore[no-redef] + def __init__(self, future=None, selector=NotImplemented): + self._future_ = future + self._selector_ = selector + + class ProxyResult(Proxy): # type: ignore[no-redef] + pass + + class ProxyVisualization(ProxyResult): # type: ignore[no-redef] + pass + + class ProxyArtifact(ProxyResult): # type: ignore[no-redef] + pass + + class ProxyResults(Proxy): # type: ignore[no-redef] + _signature_ = {} + + class ProxyResultCollection(ProxyResult): # type: ignore[no-redef] + pass + + def is_visualization_type(*_args, **_kwargs): # type: ignore[no-redef] + _require_qiime2() + + def is_collection_type(*_args, **_kwargs): # type: ignore[no-redef] + _require_qiime2() + + +def _require_qiime2() -> None: + if _QIIME2_IMPORT_ERROR is not None: + raise ModuleNotFoundError( + "qiime2 is required for local execution/proxy support." + ) from _QIIME2_IMPORT_ERROR + class ProxyMetadata(Proxy): @@ -174,7 +218,7 @@ def kwargs_from_parsl(args, inputs, selectors, raw): elif key is None: try: new.append(selector(future)) - except: + except Exception: raise Exception(selector(range(10))) else: kwargs[key] = selector(future) From 61e5d05bcead7e2b7390d1b299e9dd1e054c82f2 Mon Sep 17 00:00:00 2001 From: John Chase Date: Wed, 25 Mar 2026 14:18:55 -0700 Subject: [PATCH 31/44] Adds apptainer support --- README.md | 30 +++- src/adagio/cli/config.py | 3 + src/adagio/cli/runner.py | 31 +++- src/adagio/cli/runtime.py | 76 ++++++-- src/adagio/executors/__init__.py | 2 + src/adagio/executors/apptainer.py | 204 +++++++++++++++++++++ src/adagio/executors/base.py | 1 + src/adagio/executors/container_support.py | 28 ++- src/adagio/executors/defaults.py | 14 +- tests/test_apptainer_launcher.py | 208 ++++++++++++++++++++++ tests/test_task_environment_config.py | 99 ++++++++++ 11 files changed, 658 insertions(+), 38 deletions(-) create mode 100644 src/adagio/executors/apptainer.py create mode 100644 tests/test_apptainer_launcher.py create mode 100644 tests/test_task_environment_config.py diff --git a/README.md b/README.md index 410270e..95ff873 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ Command-line runner for Adagio pipeline files - Python 3.10+ - `uv` (recommended for development) -- Docker (currently required for pipeline execution) +- Docker for the default task runtime +- Apptainer or Singularity when using `kind = "apptainer"` with local `.sif` images ## Installation @@ -46,10 +47,11 @@ adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache ``` `adagio run` executes each plugin task in its own task environment. -Today the default task environment is a Docker image in GHCR derived from the plugin +The default task environment is a Docker image in GHCR derived from the plugin name in the pipeline spec, for example `dada2` -> `ghcr.io/cymis/qiime2-plugin-dada2:2026.1`. -The cache directory is required and is reused across reruns by default so unchanged -successful tasks can be replayed. +Runtime config can override that per default/plugin/task and switch selected work to +Apptainer/Singularity with a local `.sif` image path. The cache directory is required +and is reused across reruns by default so unchanged successful tasks can be replayed. Equivalent positional form: @@ -138,7 +140,9 @@ demux = { image = "ghcr.io/cymis/qiime2-plugin-demux:2026.1" } "dada2.denoise_single" = { image = "registry.internal/custom-dada2:1.0", platform = "linux/amd64" } ``` -`image` and `platform` are both optional on defaults, plugin entries, and task entries. +`kind`, `image`, and `platform` are all optional on defaults, plugin entries, and task entries. +`kind` may be `docker` or `apptainer`. `image` remains the environment reference: +for Docker it is the container image, and for Apptainer it must be a local `.sif` path. Precedence is `task override > plugin override > defaults > default resolver`. @@ -148,6 +152,22 @@ plugin name. If `platform` is omitted all the way through, Adagio uses normal Docker platform resolution with no implicit fallback. Anything not listed in the config uses the default plugin image resolver. +Concrete Apptainer example: + +```toml +version = 1 + +[defaults] +kind = "docker" + +[plugins] +bowtie2 = { kind = "apptainer", image = "/shared/qiime-images/q2-bowtie2-test.sif" } +``` + +For `kind = "apptainer"`, Adagio prefers the `apptainer` executable and falls back to +`singularity`. The current implementation supports only local `.sif` paths and runs +tasks serially; no scheduler submission or remote image pull behavior is included. + ### QAPI generation/submission Generate and submit plugin metadata from the active QIIME environment: diff --git a/src/adagio/cli/config.py b/src/adagio/cli/config.py index 8838599..5c09c71 100644 --- a/src/adagio/cli/config.py +++ b/src/adagio/cli/config.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Literal from pydantic import BaseModel, Field @@ -9,11 +10,13 @@ class ImageOverride(BaseModel): + kind: Literal["docker", "apptainer"] | None = None image: str | None = None platform: str | None = None class DefaultOverride(BaseModel): + kind: Literal["docker", "apptainer"] | None = None image: str | None = None platform: str | None = None diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 9b9371b..962a8db 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -29,6 +29,7 @@ def _error_exit(console: Console, message: str) -> None: console.print(panel) sys.exit(1) + DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" @@ -68,17 +69,26 @@ def run_pipeline_from_kwargs( unknown_inputs = sorted(set(arguments_data.inputs) - input_names) if unknown_inputs: - _error_exit(console, "Unknown inputs in arguments file: " + ", ".join(unknown_inputs)) + _error_exit( + console, + "Unknown inputs in arguments file: " + ", ".join(unknown_inputs), + ) unknown_params = sorted(set(arguments_data.parameters) - param_names) if unknown_params: - _error_exit(console, "Unknown parameters in arguments file: " + ", ".join(unknown_params)) + _error_exit( + console, + "Unknown parameters in arguments file: " + ", ".join(unknown_params), + ) unknown_outputs: list[str] = [] if isinstance(arguments_data.outputs, dict): unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) if unknown_outputs: - _error_exit(console, "Unknown outputs in arguments file: " + ", ".join(unknown_outputs)) + _error_exit( + console, + "Unknown outputs in arguments file: " + ", ".join(unknown_outputs), + ) arguments.inputs.update(arguments_data.inputs) arguments.parameters.update(arguments_data.parameters) @@ -102,10 +112,9 @@ def run_pipeline_from_kwargs( name for name in required_params if _is_missing(arguments.parameters.get(name)) ] if missing_inputs or missing_params: - missing_opts = ( - [f"--input-{n.replace('_', '-')}" for n in missing_inputs] - + [f"--param-{n.replace('_', '-')}" for n in missing_params] - ) + missing_opts = [f"--input-{n.replace('_', '-')}" for n in missing_inputs] + [ + f"--param-{n.replace('_', '-')}" for n in missing_params + ] formatted = ", ".join(f"[cyan]{opt}[/cyan]" for opt in missing_opts) _error_exit(console, f"Missing required arguments: {formatted}") @@ -196,10 +205,11 @@ def _config_default_override(run_config: Any) -> TaskEnvironmentOverride | None: return None defaults = run_config.defaults - if defaults.image is None and defaults.platform is None: + if defaults.kind is None and defaults.image is None and defaults.platform is None: return None return TaskEnvironmentOverride( + kind=defaults.kind, reference=defaults.image, platform=defaults.platform, ) @@ -210,10 +220,13 @@ def _config_named_overrides( ) -> dict[str, TaskEnvironmentOverride] | None: resolved = { name: TaskEnvironmentOverride( + kind=override.kind, reference=override.image, platform=override.platform, ) for name, override in raw_overrides.items() - if override.image is not None or override.platform is not None + if override.kind is not None + or override.image is not None + or override.platform is not None } return resolved or None diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py index 2f6709e..8328035 100644 --- a/src/adagio/cli/runtime.py +++ b/src/adagio/cli/runtime.py @@ -28,7 +28,7 @@ def run_runtime(argv: list[str], *, console: Console) -> None: prog="adagio runtime", description=( "Execute a pipeline from spec/config/arguments files. " - "The config file may define default, per-plugin, and per-task image/platform overrides." + "The config file may define default, per-plugin, and per-task environment overrides." ), ) parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") @@ -37,10 +37,16 @@ def run_runtime(argv: list[str], *, console: Console) -> None: required=True, help="Path to runtime config TOML.", ) - parser.add_argument("--arguments", required=False, help="Path to run arguments JSON.") + parser.add_argument( + "--arguments", required=False, help="Path to run arguments JSON." + ) parser.add_argument("--job-id", required=False, help="Runtime job ID.") - parser.add_argument("--output-dir", required=False, help="Directory for output artifacts.") - parser.add_argument("--runtime-url", required=False, help="Runtime adapter API base URL.") + parser.add_argument( + "--output-dir", required=False, help="Directory for output artifacts." + ) + parser.add_argument( + "--runtime-url", required=False, help="Runtime adapter API base URL." + ) parser.add_argument( "--cache-dir", required=True, @@ -82,7 +88,11 @@ def run_runtime(argv: list[str], *, console: Console) -> None: reuse=opts.reuse, ) - connected = bool(opts.connected and opts.job_id and (opts.runtime_url or os.getenv("RUNTIME_URL"))) + connected = bool( + opts.connected + and opts.job_id + and (opts.runtime_url or os.getenv("RUNTIME_URL")) + ) runtime_url = opts.runtime_url or os.getenv("RUNTIME_URL") log_monitor = LogMonitor(console=console) @@ -104,8 +114,12 @@ def run_runtime(argv: list[str], *, console: Console) -> None: executor = select_default_executor( default_override=_default_override(run_config), - plugin_overrides=_named_overrides(run_config.plugins if run_config is not None else {}), - task_overrides=_named_overrides(run_config.tasks if run_config is not None else {}), + plugin_overrides=_named_overrides( + run_config.plugins if run_config is not None else {} + ), + task_overrides=_named_overrides( + run_config.tasks if run_config is not None else {} + ), ) try: @@ -157,9 +171,10 @@ def _default_override(run_config: Any) -> TaskEnvironmentOverride | None: if run_config is None: return None defaults = run_config.defaults - if defaults.image is None and defaults.platform is None: + if defaults.kind is None and defaults.image is None and defaults.platform is None: return None return TaskEnvironmentOverride( + kind=defaults.kind, reference=defaults.image, platform=defaults.platform, ) @@ -170,11 +185,14 @@ def _named_overrides( ) -> dict[str, TaskEnvironmentOverride] | None: resolved = { name: TaskEnvironmentOverride( + kind=override.kind, reference=override.image, platform=override.platform, ) for name, override in raw_overrides.items() - if override.image is not None or override.platform is not None + if override.kind is not None + or override.image is not None + or override.platform is not None } return resolved or None @@ -190,7 +208,11 @@ def _build_arguments( if isinstance(runtime_arguments, dict): if isinstance(runtime_arguments.get("inputs"), dict): - _apply_named_arguments(arguments=arguments, runtime_arguments=runtime_arguments, storage_root=storage_root) + _apply_named_arguments( + arguments=arguments, + runtime_arguments=runtime_arguments, + storage_root=storage_root, + ) else: _apply_legacy_arguments( pipeline=pipeline, @@ -199,7 +221,9 @@ def _build_arguments( storage_root=storage_root, ) - resolved_outputs = _resolve_outputs(runtime_arguments.get("outputs"), storage_root=storage_root) + resolved_outputs = _resolve_outputs( + runtime_arguments.get("outputs"), storage_root=storage_root + ) if resolved_outputs is not None: arguments.outputs = resolved_outputs @@ -215,7 +239,9 @@ def _apply_named_arguments( raw_inputs = runtime_arguments.get("inputs", {}) if isinstance(raw_inputs, dict): for name, value in raw_inputs.items(): - arguments.inputs[name] = _resolve_input_path(value, storage_root=storage_root) + arguments.inputs[name] = _resolve_input_path( + value, storage_root=storage_root + ) raw_parameters = runtime_arguments.get("parameters", {}) if isinstance(raw_parameters, dict): @@ -230,7 +256,11 @@ def _apply_legacy_arguments( storage_root: str, ) -> None: preprocessing = runtime_arguments.get("preprocessing", {}) - root_artifacts = preprocessing.get("root_artifacts", []) if isinstance(preprocessing, dict) else [] + root_artifacts = ( + preprocessing.get("root_artifacts", []) + if isinstance(preprocessing, dict) + else [] + ) token_lookup: dict[str, Any] = {} if isinstance(root_artifacts, list): for artifact in root_artifacts: @@ -246,12 +276,16 @@ def _apply_legacy_arguments( token = token_lookup.get(str(input_def.id)) if token is None: continue - arguments.inputs[input_def.name] = _resolve_input_path(token, storage_root=storage_root) + arguments.inputs[input_def.name] = _resolve_input_path( + token, storage_root=storage_root + ) named_inputs = runtime_arguments.get("inputs", {}) if isinstance(named_inputs, dict): for name, value in named_inputs.items(): - arguments.inputs[name] = _resolve_input_path(value, storage_root=storage_root) + arguments.inputs[name] = _resolve_input_path( + value, storage_root=storage_root + ) task_arguments = runtime_arguments.get("arguments", {}) if isinstance(task_arguments, dict): @@ -314,7 +348,9 @@ def _is_missing(value: Any) -> bool: return value is None or value == "" or value == "" -def _validate_required_arguments(pipeline: AdagioPipeline, arguments: AdagioArguments) -> None: +def _validate_required_arguments( + pipeline: AdagioPipeline, arguments: AdagioArguments +) -> None: missing_inputs = [ input_def.name for input_def in pipeline.signature.inputs @@ -323,11 +359,15 @@ def _validate_required_arguments(pipeline: AdagioPipeline, arguments: AdagioArgu missing_params = [ param.name for param in pipeline.signature.parameters - if param.required and param.default is None and _is_missing(arguments.parameters.get(param.name)) + if param.required + and param.default is None + and _is_missing(arguments.parameters.get(param.name)) ] if missing_inputs or missing_params: - missing = [f"input:{name}" for name in missing_inputs] + [f"param:{name}" for name in missing_params] + missing = [f"input:{name}" for name in missing_inputs] + [ + f"param:{name}" for name in missing_params + ] raise SystemExit("Missing required runtime arguments: " + ", ".join(missing)) diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py index da8647b..22da8d3 100644 --- a/src/adagio/executors/__init__.py +++ b/src/adagio/executors/__init__.py @@ -11,6 +11,7 @@ def select_default_executor( ConfigurableTaskEnvironmentResolver, DefaultTaskEnvironmentResolver, ) + from .apptainer import ApptainerTaskEnvironmentLauncher from .docker import DockerTaskEnvironmentLauncher from .task_environments import TaskEnvironmentExecutor @@ -22,6 +23,7 @@ def select_default_executor( task_overrides=task_overrides, ), launchers={ + "apptainer": ApptainerTaskEnvironmentLauncher(), "docker": DockerTaskEnvironmentLauncher(), }, ) diff --git a/src/adagio/executors/apptainer.py b/src/adagio/executors/apptainer.py new file mode 100644 index 0000000..98b6654 --- /dev/null +++ b/src/adagio/executors/apptainer.py @@ -0,0 +1,204 @@ +import shutil +import subprocess +from pathlib import Path + +from rich.console import Console + +from .base import ( + TaskEnvironmentLauncher, + TaskEnvironmentSpec, + TaskExecutionRequest, + TaskExecutionResult, +) +from .cache_support import mount_path_for_cache +from .container_support import ( + containerize_host_value, + containerize_path, + host_path_from_container, + is_uri, + local_source_root, + print_filtered_container_stderr, + python_warning_env_assignments, + with_apptainer_binds, +) +from .task_contract import ( + build_task_spec, + parse_result_manifest, + read_json_file, + result_manifest_path, + task_spec_path, + write_json_file, +) + + +class ApptainerTaskEnvironmentLauncher(TaskEnvironmentLauncher): + kind = "apptainer" + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: + image_path = _resolve_sif_image(environment.reference) + runtime_executable = _resolve_runtime_executable() + + task = request.task + archive_inputs = { + name: containerize_host_value(value) + for name, value in request.archive_inputs.items() + } + metadata_inputs = { + name: containerize_host_value(value) + for name, value in request.metadata_inputs.items() + } + outputs = { + name: containerize_path(Path(path)) + for name, path in request.outputs.items() + } + + manifest_path = result_manifest_path( + task_id=task.id, work_path=request.work_path + ) + spec_path = task_spec_path(task_id=task.id, work_path=request.work_path) + task_spec = build_task_spec( + plugin=task.plugin, + action=task.action, + archive_inputs=archive_inputs, + metadata_inputs=metadata_inputs, + params=dict(request.params), + metadata_column_kwargs=dict(request.metadata_column_kwargs), + outputs=outputs, + result_manifest=containerize_path(manifest_path), + cache_path=( + containerize_path(Path(request.cache_path)) + if request.cache_path is not None + else None + ), + recycle_pool=request.recycle_pool, + ) + write_json_file(spec_path, task_spec) + + src_root = local_source_root() + command = [ + runtime_executable, + "exec", + "--no-home", + "--pwd", + containerize_path(request.cwd), + ] + + host_paths = [request.cwd, request.work_path, src_root] + for value in list(request.archive_inputs.values()) + list( + request.metadata_inputs.values() + ): + if is_uri(value): + continue + path = Path(value) + if path.is_absolute(): + host_paths.append(path) + if request.cache_path is not None: + host_paths.append(mount_path_for_cache(Path(request.cache_path))) + + command = with_apptainer_binds(command=command, host_paths=host_paths) + command.extend( + [ + str(image_path), + "env", + f"PYTHONPATH={containerize_path(src_root)}", + *python_warning_env_assignments(), + "python", + "-m", + "adagio.cli.task_exec", + "--task", + containerize_path(spec_path), + ] + ) + + if console is not None: + label = f"{Path(runtime_executable).name} {image_path}" + console.print(f"[dim]Task environment:[/dim] {label}") + + try: + result = subprocess.run( + command, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise SystemExit( + "Apptainer/Singularity is required for task environment execution " + "but was not found in PATH. Ensure the job environment includes the " + "Apptainer binary location." + ) from exc + + if console is not None: + print_filtered_container_stderr( + console=console, stderr_text=result.stderr or "" + ) + + if result.returncode != 0: + stdout_text = (result.stdout or "").strip() + stderr_text = (result.stderr or "").strip() + if stderr_text: + detail = f" Runtime reported: {stderr_text}" + elif stdout_text: + detail = f" Container stdout: {stdout_text}" + else: + detail = "" + raise RuntimeError( + f"Task {task.id!r} ({task.plugin}.{task.action}) failed " + f"while launching environment {str(image_path)!r} " + f"with exit code {result.returncode}.{detail}" + ) + + if not manifest_path.exists(): + raise RuntimeError( + f"Task {task.id!r} completed but did not write an output manifest." + ) + + output_manifest = read_json_file(manifest_path) + reported_outputs, reused = parse_result_manifest(output_manifest) + resolved_outputs = {} + for output_name in request.outputs: + actual_path = reported_outputs.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not report output {output_name!r}." + ) + resolved_outputs[output_name] = str(host_path_from_container(actual_path)) + + return TaskExecutionResult(outputs=resolved_outputs, reused=reused) + + +def _resolve_runtime_executable() -> str: + for candidate in ("apptainer", "singularity"): + resolved = shutil.which(candidate) + if resolved: + return resolved + raise SystemExit( + "Apptainer/Singularity is required for task environment execution " + "but was not found in PATH. Ensure the job environment includes the " + "Apptainer binary location." + ) + + +def _resolve_sif_image(reference: str) -> Path: + if is_uri(reference): + raise RuntimeError( + "Apptainer task environments currently support only local .sif image paths." + ) + + image_path = Path(reference).expanduser().resolve() + if image_path.suffix.lower() != ".sif": + raise RuntimeError( + f"Apptainer task environments require a local .sif image path, got {reference!r}." + ) + if not image_path.exists(): + raise RuntimeError(f"Apptainer image not found: {image_path}") + if not image_path.is_file(): + raise RuntimeError(f"Apptainer image is not a file: {image_path}") + return image_path diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index 9b47d6c..9446a9a 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -36,6 +36,7 @@ class TaskEnvironmentSpec: @dataclass(frozen=True) class TaskEnvironmentOverride: + kind: str | None = None reference: str | None = None platform: str | None = None diff --git a/src/adagio/executors/container_support.py b/src/adagio/executors/container_support.py index 8c4b97c..61b731d 100644 --- a/src/adagio/executors/container_support.py +++ b/src/adagio/executors/container_support.py @@ -21,6 +21,20 @@ def with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: return [*command[:3], *mount_flags, *command[3:]] +def with_apptainer_binds(*, command: list[str], host_paths: list[Path]) -> list[str]: + """Attach bind mounts for top-level host roots needed by Apptainer/Singularity.""" + roots = mount_roots(host_paths) + bind_flags: list[str] = [] + for root in roots: + bind_flags.extend( + [ + "--bind", + f"{root}:{containerize_path(root)}:rw", + ] + ) + return [*command[:2], *bind_flags, *command[2:]] + + def docker_tty_flags() -> list[str]: """Allocate Docker TTY when the current session is interactive.""" if sys.stdin.isatty() and sys.stdout.isatty(): @@ -28,15 +42,23 @@ def docker_tty_flags() -> list[str]: return [] -def python_warning_env_flags() -> list[str]: - """Suppress known noisy runtime warnings in container mode.""" +def python_warning_env_assignments() -> list[str]: + """Return runtime warning environment assignments for container execution.""" filters = os.getenv("ADAGIO_PYTHONWARNINGS") if filters is None: filters = "ignore:pkg_resources is deprecated as an API:UserWarning" filters = filters.strip() if not filters: return [] - return ["-e", f"PYTHONWARNINGS={filters}"] + return [f"PYTHONWARNINGS={filters}"] + + +def python_warning_env_flags() -> list[str]: + """Suppress known noisy runtime warnings in container mode.""" + flags: list[str] = [] + for assignment in python_warning_env_assignments(): + flags.extend(["-e", assignment]) + return flags def mount_roots(paths: list[Path]) -> list[Path]: diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py index bb1b0a7..9ebf57b 100644 --- a/src/adagio/executors/defaults.py +++ b/src/adagio/executors/defaults.py @@ -56,6 +56,7 @@ def __init__( def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: base_environment = self._base.resolve(task=task) + kind = base_environment.kind reference = base_environment.reference options = dict(base_environment.options or {}) configured = False @@ -67,6 +68,9 @@ def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: ): if override is None: continue + if override.kind is not None: + kind = override.kind + configured = True if override.reference is not None: reference = override.reference configured = True @@ -75,7 +79,7 @@ def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: configured = True return TaskEnvironmentSpec( - kind=base_environment.kind, + kind=kind, reference=reference, description=( f"configured environment for {task.name or task.id}" @@ -85,7 +89,9 @@ def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: options=options or None, ) - def _find_task_override(self, *, task: PluginActionTask) -> TaskEnvironmentOverride | None: + def _find_task_override( + self, *, task: PluginActionTask + ) -> TaskEnvironmentOverride | None: candidates = [task.id] if task.name: candidates.insert(0, task.name) @@ -97,7 +103,9 @@ def _find_task_override(self, *, task: PluginActionTask) -> TaskEnvironmentOverr return override return None - def _find_plugin_override(self, *, task: PluginActionTask) -> TaskEnvironmentOverride | None: + def _find_plugin_override( + self, *, task: PluginActionTask + ) -> TaskEnvironmentOverride | None: plugin_candidates = [task.plugin, task.plugin.lower()] for candidate in plugin_candidates: override = self._plugin_overrides.get(candidate) diff --git a/tests/test_apptainer_launcher.py b/tests/test_apptainer_launcher.py new file mode 100644 index 0000000..254433b --- /dev/null +++ b/tests/test_apptainer_launcher.py @@ -0,0 +1,208 @@ +import subprocess +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from adagio.executors.apptainer import ApptainerTaskEnvironmentLauncher +from adagio.executors.base import TaskEnvironmentSpec, TaskExecutionRequest +from adagio.executors.container_support import ( + containerize_path, + local_source_root, + mount_roots, +) +from adagio.executors.task_contract import ( + build_result_manifest, + result_manifest_path, + task_spec_path, + write_json_file, +) +from adagio.model.task import PluginActionTask + + +def _task() -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": {}, + "parameters": {}, + "outputs": {"table": {"kind": "archive", "id": "out-1"}}, + } + ) + + +class ApptainerLauncherTests(unittest.TestCase): + def test_launch_builds_apptainer_exec_command(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + image_path = root / "q2-dada2.sif" + image_path.write_text("stub", encoding="utf-8") + output_path = work_path / "table.qza" + input_path = cwd / "input.qza" + input_path.write_text("input", encoding="utf-8") + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={"seqs": str(input_path)}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(output_path)}, + ) + + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + expected_spec = containerize_path( + task_spec_path(task_id=task.id, work_path=work_path) + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"table": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with ( + patch( + "adagio.executors.apptainer.shutil.which", + side_effect=["/usr/bin/apptainer", None], + ), + patch( + "adagio.executors.apptainer.subprocess.run", + side_effect=fake_run, + ) as run_mock, + ): + result = launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference=str(image_path), + ), + request=request, + ) + + command = run_mock.call_args.args[0] + bind_targets = { + f"{root_path}:{containerize_path(root_path)}:rw" + for root_path in mount_roots( + [cwd, work_path, input_path, local_source_root()] + ) + } + + self.assertEqual(command[0], "/usr/bin/apptainer") + self.assertEqual(command[1], "exec") + self.assertIn("--no-home", command) + self.assertIn("--pwd", command) + self.assertIn(containerize_path(cwd), command) + self.assertIn(str(image_path), command) + self.assertIn("env", command) + self.assertIn(f"PYTHONPATH={containerize_path(local_source_root())}", command) + self.assertIn("python", command) + self.assertIn("-m", command) + self.assertIn("adagio.cli.task_exec", command) + self.assertIn("--task", command) + self.assertIn(expected_spec, command) + self.assertTrue(bind_targets.issubset(set(command))) + self.assertEqual(result.outputs, {"table": str(output_path)}) + self.assertFalse(result.reused) + + def test_launch_falls_back_to_singularity(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + image_path = root / "q2-dada2.sif" + image_path.write_text("stub", encoding="utf-8") + output_path = work_path / "table.qza" + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(output_path)}, + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"table": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with ( + patch( + "adagio.executors.apptainer.shutil.which", + side_effect=[None, "/usr/bin/singularity"], + ), + patch( + "adagio.executors.apptainer.subprocess.run", + side_effect=fake_run, + ) as run_mock, + ): + launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference=str(image_path), + ), + request=request, + ) + + command = run_mock.call_args.args[0] + self.assertEqual(command[0], "/usr/bin/singularity") + + def test_launch_rejects_non_local_image_reference(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + + request = TaskExecutionRequest( + task=_task(), + cwd=cwd, + work_path=work_path, + archive_inputs={}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(work_path / "table.qza")}, + ) + + with self.assertRaisesRegex(RuntimeError, "local \\.sif image paths"): + launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference="docker://ghcr.io/cymis/qiime2-plugin-dada2:2026.1", + ), + request=request, + ) diff --git a/tests/test_task_environment_config.py b/tests/test_task_environment_config.py new file mode 100644 index 0000000..a7d8bd8 --- /dev/null +++ b/tests/test_task_environment_config.py @@ -0,0 +1,99 @@ +import tempfile +import unittest +from pathlib import Path + +from adagio.cli.config import load_run_config +from adagio.executors.base import TaskEnvironmentOverride +from adagio.executors.defaults import ( + ConfigurableTaskEnvironmentResolver, + DefaultTaskEnvironmentResolver, +) +from adagio.model.task import PluginActionTask + + +def _task(*, name: str | None = None) -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "name": name, + "plugin": "dada2", + "action": "denoise_single", + "inputs": {}, + "parameters": {}, + "outputs": {}, + } + ) + + +class RunConfigTests(unittest.TestCase): + def test_load_run_config_accepts_apptainer_kind(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "runtime.toml" + config_path.write_text( + "\n".join( + [ + "version = 1", + "", + "[defaults]", + 'kind = "apptainer"', + 'image = "/images/default.sif"', + "", + "[plugins]", + 'dada2 = { kind = "apptainer", image = "/images/dada2.sif" }', + "", + "[tasks]", + '"dada2.denoise_single" = { image = "/images/task.sif" }', + ] + ), + encoding="utf-8", + ) + + config = load_run_config(config_path) + + assert config is not None + self.assertEqual(config.defaults.kind, "apptainer") + self.assertEqual(config.defaults.image, "/images/default.sif") + self.assertEqual(config.plugins["dada2"].kind, "apptainer") + self.assertEqual(config.tasks["dada2.denoise_single"].image, "/images/task.sif") + + +class ConfigurableResolverTests(unittest.TestCase): + def test_plugin_override_inherits_default_apptainer_kind(self) -> None: + resolver = ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + default_override=TaskEnvironmentOverride( + kind="apptainer", + reference="/images/default.sif", + ), + plugin_overrides={ + "dada2": TaskEnvironmentOverride(reference="/images/dada2.sif"), + }, + ) + + environment = resolver.resolve(task=_task()) + + self.assertEqual(environment.kind, "apptainer") + self.assertEqual(environment.reference, "/images/dada2.sif") + + def test_task_override_can_switch_back_to_docker(self) -> None: + resolver = ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + default_override=TaskEnvironmentOverride( + kind="apptainer", + reference="/images/default.sif", + ), + task_overrides={ + "named-step": TaskEnvironmentOverride( + kind="docker", + reference="registry.internal/dada2:1.0", + platform="linux/amd64", + ) + }, + ) + + environment = resolver.resolve(task=_task(name="named-step")) + + self.assertEqual(environment.kind, "docker") + self.assertEqual(environment.reference, "registry.internal/dada2:1.0") + self.assertEqual(environment.options, {"platform": "linux/amd64"}) From 5ee9aee674342ffa882a22abc583c81b36954ad6 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 26 Mar 2026 10:52:03 -0700 Subject: [PATCH 32/44] Fizes bug where host packages were injected into container --- src/adagio/executors/apptainer.py | 10 +- src/adagio/executors/container_support.py | 46 ++++++++- src/adagio/executors/docker.py | 12 ++- tests/test_apptainer_launcher.py | 9 +- tests/test_container_support.py | 58 +++++++++++ tests/test_docker_launcher.py | 115 ++++++++++++++++++++++ 6 files changed, 237 insertions(+), 13 deletions(-) create mode 100644 tests/test_container_support.py create mode 100644 tests/test_docker_launcher.py diff --git a/src/adagio/executors/apptainer.py b/src/adagio/executors/apptainer.py index 98b6654..c7408da 100644 --- a/src/adagio/executors/apptainer.py +++ b/src/adagio/executors/apptainer.py @@ -12,11 +12,11 @@ ) from .cache_support import mount_path_for_cache from .container_support import ( + container_python_root, containerize_host_value, containerize_path, host_path_from_container, is_uri, - local_source_root, print_filtered_container_stderr, python_warning_env_assignments, with_apptainer_binds, @@ -80,16 +80,17 @@ def launch( ) write_json_file(spec_path, task_spec) - src_root = local_source_root() + python_root = container_python_root(work_path=request.work_path) command = [ runtime_executable, "exec", + "--cleanenv", "--no-home", "--pwd", containerize_path(request.cwd), ] - host_paths = [request.cwd, request.work_path, src_root] + host_paths = [request.cwd, request.work_path, python_root] for value in list(request.archive_inputs.values()) + list( request.metadata_inputs.values() ): @@ -106,7 +107,8 @@ def launch( [ str(image_path), "env", - f"PYTHONPATH={containerize_path(src_root)}", + f"PYTHONPATH={containerize_path(python_root)}", + "PYTHONNOUSERSITE=1", *python_warning_env_assignments(), "python", "-m", diff --git a/src/adagio/executors/container_support.py b/src/adagio/executors/container_support.py index 61b731d..1e5f385 100644 --- a/src/adagio/executors/container_support.py +++ b/src/adagio/executors/container_support.py @@ -1,10 +1,12 @@ import os +import shutil import sys from pathlib import Path from rich.console import Console HOST_MOUNT_POINT = "/host" +STAGED_CONTAINER_PYTHON_ROOT = ".adagio-container-python" def with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: @@ -103,7 +105,22 @@ def is_uri(value: str) -> bool: def local_source_root() -> Path: """Return the local `adagio-cli/src` path for container PYTHONPATH.""" - return Path(__file__).resolve().parents[2] + source_root = _adagio_source_root() + if source_root is None: + raise RuntimeError("Adagio source root is unavailable from this installation.") + return source_root + + +def container_python_root(*, work_path: Path, module_file: Path | None = None) -> Path: + """Return an isolated Python root that exposes only the Adagio package.""" + source_root = _adagio_source_root(module_file=module_file) + if source_root is not None: + return source_root + + package_dir = _adagio_package_dir(module_file=module_file) + staged_root = (work_path / STAGED_CONTAINER_PYTHON_ROOT).resolve() + _stage_adagio_package(package_dir=package_dir, staged_root=staged_root) + return staged_root def print_filtered_container_stderr(*, console: Console, stderr_text: str) -> None: @@ -123,3 +140,30 @@ def is_docker_platform_warning(line: str) -> bool: "requested image's platform" in line and "does not match the detected host platform" in line ) + + +def _adagio_source_root(*, module_file: Path | None = None) -> Path | None: + package_dir = _adagio_package_dir(module_file=module_file) + candidate = package_dir.parent + if candidate.name != "src": + return None + if not (candidate / "adagio" / "__init__.py").is_file(): + return None + return candidate + + +def _adagio_package_dir(*, module_file: Path | None = None) -> Path: + resolved = (module_file or Path(__file__)).resolve() + return resolved.parents[1] + + +def _stage_adagio_package(*, package_dir: Path, staged_root: Path) -> None: + staged_package_dir = staged_root / package_dir.name + if staged_package_dir.exists(): + shutil.rmtree(staged_package_dir) + staged_root.mkdir(parents=True, exist_ok=True) + shutil.copytree( + package_dir, + staged_package_dir, + ignore=shutil.ignore_patterns("__pycache__", "*.pyc"), + ) diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index ce211bf..176c62b 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -11,13 +11,13 @@ ) from .cache_support import mount_path_for_cache from .container_support import ( + container_python_root, containerize_host_value, containerize_path, + docker_tty_flags, host_path_from_container, is_uri, - local_source_root, print_filtered_container_stderr, - docker_tty_flags, python_warning_env_flags, with_mounts, ) @@ -75,7 +75,7 @@ def launch( ) write_json_file(spec_path, task_spec) - src_root = local_source_root() + python_root = container_python_root(work_path=request.work_path) platform = None if environment.options is not None: raw_platform = environment.options.get("platform") @@ -88,7 +88,9 @@ def launch( "--rm", *docker_tty_flags(), "-e", - f"PYTHONPATH={containerize_path(src_root)}", + f"PYTHONPATH={containerize_path(python_root)}", + "-e", + "PYTHONNOUSERSITE=1", *python_warning_env_flags(), "-w", containerize_path(request.cwd), @@ -104,7 +106,7 @@ def launch( containerize_path(spec_path), ]) - host_paths = [request.cwd, request.work_path, src_root] + host_paths = [request.cwd, request.work_path, python_root] for value in list(request.archive_inputs.values()) + list(request.metadata_inputs.values()): if is_uri(value): continue diff --git a/tests/test_apptainer_launcher.py b/tests/test_apptainer_launcher.py index 254433b..59bc16d 100644 --- a/tests/test_apptainer_launcher.py +++ b/tests/test_apptainer_launcher.py @@ -7,8 +7,8 @@ from adagio.executors.apptainer import ApptainerTaskEnvironmentLauncher from adagio.executors.base import TaskEnvironmentSpec, TaskExecutionRequest from adagio.executors.container_support import ( + container_python_root, containerize_path, - local_source_root, mount_roots, ) from adagio.executors.task_contract import ( @@ -96,21 +96,24 @@ def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 ) command = run_mock.call_args.args[0] + python_root = container_python_root(work_path=work_path) bind_targets = { f"{root_path}:{containerize_path(root_path)}:rw" for root_path in mount_roots( - [cwd, work_path, input_path, local_source_root()] + [cwd, work_path, input_path, python_root] ) } self.assertEqual(command[0], "/usr/bin/apptainer") self.assertEqual(command[1], "exec") + self.assertIn("--cleanenv", command) self.assertIn("--no-home", command) self.assertIn("--pwd", command) self.assertIn(containerize_path(cwd), command) self.assertIn(str(image_path), command) self.assertIn("env", command) - self.assertIn(f"PYTHONPATH={containerize_path(local_source_root())}", command) + self.assertIn(f"PYTHONPATH={containerize_path(python_root)}", command) + self.assertIn("PYTHONNOUSERSITE=1", command) self.assertIn("python", command) self.assertIn("-m", command) self.assertIn("adagio.cli.task_exec", command) diff --git a/tests/test_container_support.py b/tests/test_container_support.py new file mode 100644 index 0000000..3dccd2e --- /dev/null +++ b/tests/test_container_support.py @@ -0,0 +1,58 @@ +import tempfile +import unittest +from pathlib import Path + +from adagio.executors.container_support import ( + STAGED_CONTAINER_PYTHON_ROOT, + container_python_root, +) + + +class ContainerPythonRootTests(unittest.TestCase): + def test_prefers_repo_src_tree_when_available(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + src_root = root / "src" + package_dir = src_root / "adagio" + module_file = package_dir / "executors" / "container_support.py" + work_path = root / "work" + + (package_dir / "executors").mkdir(parents=True) + work_path.mkdir() + (package_dir / "__init__.py").write_text("", encoding="utf-8") + module_file.write_text("", encoding="utf-8") + + result = container_python_root(work_path=work_path, module_file=module_file) + + self.assertEqual(result, src_root) + self.assertFalse((work_path / STAGED_CONTAINER_PYTHON_ROOT).exists()) + + def test_stages_only_adagio_package_from_site_packages(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + site_packages = root / "site-packages" + package_dir = site_packages / "adagio" + module_file = package_dir / "executors" / "container_support.py" + work_path = root / "work" + + (package_dir / "executors").mkdir(parents=True) + (package_dir / "cli").mkdir() + (site_packages / "psutil").mkdir(parents=True) + work_path.mkdir() + + (package_dir / "__init__.py").write_text("", encoding="utf-8") + (package_dir / "cli" / "task_exec.py").write_text( + "VALUE = 1\n", encoding="utf-8" + ) + module_file.write_text("", encoding="utf-8") + (site_packages / "psutil" / "__init__.py").write_text( + "VALUE = 2\n", encoding="utf-8" + ) + + result = container_python_root(work_path=work_path, module_file=module_file) + + staged_root = work_path / STAGED_CONTAINER_PYTHON_ROOT + self.assertEqual(result, staged_root) + self.assertTrue((staged_root / "adagio" / "__init__.py").exists()) + self.assertTrue((staged_root / "adagio" / "cli" / "task_exec.py").exists()) + self.assertFalse((staged_root / "psutil").exists()) diff --git a/tests/test_docker_launcher.py b/tests/test_docker_launcher.py new file mode 100644 index 0000000..cb318cd --- /dev/null +++ b/tests/test_docker_launcher.py @@ -0,0 +1,115 @@ +import subprocess +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from adagio.executors.base import TaskEnvironmentSpec, TaskExecutionRequest +from adagio.executors.container_support import ( + container_python_root, + containerize_path, + mount_roots, +) +from adagio.executors.docker import DockerTaskEnvironmentLauncher +from adagio.executors.task_contract import ( + build_result_manifest, + result_manifest_path, + task_spec_path, + write_json_file, +) +from adagio.model.task import PluginActionTask + + +def _task() -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "plugin": "demux", + "action": "summarize", + "inputs": {}, + "parameters": {}, + "outputs": {"visualization": {"kind": "archive", "id": "out-1"}}, + } + ) + + +class DockerLauncherTests(unittest.TestCase): + def test_launch_builds_docker_run_command(self) -> None: + launcher = DockerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + output_path = work_path / "summary.qzv" + input_path = cwd / "input.qza" + input_path.write_text("input", encoding="utf-8") + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={"data": str(input_path)}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"visualization": str(output_path)}, + ) + + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + expected_spec = containerize_path( + task_spec_path(task_id=task.id, work_path=work_path) + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"visualization": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with patch( + "adagio.executors.docker.subprocess.run", + side_effect=fake_run, + ) as run_mock: + result = launcher.launch( + environment=TaskEnvironmentSpec( + kind="docker", + reference="ghcr.io/cymis/qiime2-plugin-demux:2026.1", + ), + request=request, + ) + + command = run_mock.call_args.args[0] + python_root = container_python_root(work_path=work_path) + bind_targets = { + f"{root_path}:{containerize_path(root_path)}:rw" + for root_path in mount_roots([cwd, work_path, input_path, python_root]) + } + + self.assertEqual(command[0], "docker") + self.assertEqual(command[1], "run") + self.assertEqual(command[2], "--rm") + self.assertIn("-w", command) + self.assertIn(containerize_path(cwd), command) + self.assertIn( + f"PYTHONPATH={containerize_path(python_root)}", + command, + ) + self.assertIn("PYTHONNOUSERSITE=1", command) + self.assertIn("python", command) + self.assertIn("-m", command) + self.assertIn("adagio.cli.task_exec", command) + self.assertIn("--task", command) + self.assertIn(expected_spec, command) + self.assertIn("ghcr.io/cymis/qiime2-plugin-demux:2026.1", command) + self.assertTrue(bind_targets.issubset(set(command))) + self.assertEqual(result.outputs, {"visualization": str(output_path)}) + self.assertFalse(result.reused) From 0dabb01ac0c14a56642ded1711ee2952e9cabf45 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 26 Mar 2026 21:36:33 -0700 Subject: [PATCH 33/44] Adds param descriptions to help --- src/adagio/app/parsers/pipeline.py | 22 ++ src/adagio/cli/args.py | 1 + src/adagio/cli/dynamic.py | 342 ++++++++++++++++++++++++---- src/adagio/cli/main.py | 18 +- src/adagio/cli/runner.py | 53 +++++ src/adagio/model/pipeline.py | 1 + tests/test_output_options.py | 202 ++++++++++++++++ tests/test_pipeline_descriptions.py | 182 +++++++++++++++ 8 files changed, 766 insertions(+), 55 deletions(-) create mode 100644 tests/test_output_options.py create mode 100644 tests/test_pipeline_descriptions.py diff --git a/src/adagio/app/parsers/pipeline.py b/src/adagio/app/parsers/pipeline.py index 7989a69..621fab2 100644 --- a/src/adagio/app/parsers/pipeline.py +++ b/src/adagio/app/parsers/pipeline.py @@ -12,6 +12,7 @@ class Parameter(BaseModel): required: bool default: Optional[Any] = None type: str + description: Optional[str] = None class Input(BaseModel): @@ -19,6 +20,14 @@ class Input(BaseModel): name: str required: bool type: str + description: Optional[str] = None + + +class Output(BaseModel): + id: UUID + name: str + type: str + description: Optional[str] = None def _extract_signature(data: Any) -> dict[str, Any]: @@ -65,3 +74,16 @@ def parse_inputs(data: Any) -> List[Input]: ) return [Input(**input_item) for input_item in raw_inputs] + + +def parse_outputs(data: Any) -> List[Output]: + """Parse pipeline outputs from supported pipeline JSON layouts.""" + signature = _extract_signature(data) + + raw_outputs = signature.get("outputs") + if not isinstance(raw_outputs, list): + raise ValueError( + "Invalid pipeline: missing 'signature.outputs' list in pipeline JSON." + ) + + return [Output(**output_item) for output_item in raw_outputs] diff --git a/src/adagio/cli/args.py b/src/adagio/cli/args.py index e7cd024..3a7246e 100644 --- a/src/adagio/cli/args.py +++ b/src/adagio/cli/args.py @@ -11,6 +11,7 @@ class StrEnum(str, Enum): class ParamType(StrEnum): INPUT = "input" PARAM = "param" + OUTPUT = "output" class ShowParamsMode(StrEnum): diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index f044996..3cc81b2 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -1,12 +1,15 @@ import inspect +import math import re +import types from pathlib import Path -from typing import Any, Annotated, Callable +from typing import Any, Annotated, Callable, Union, get_args, get_origin from cyclopts import Group from cyclopts import Parameter as CliParameter from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Output as OutputSpec from ..app.parsers.pipeline import Parameter as ParamSpec from ..executors.cache_support import ( CACHE_DIR_HELP, @@ -16,52 +19,208 @@ class _PipelineGroupFormatter: - """Render pipeline options in one panel with nested subsections.""" + """Render pipeline options in one aligned table.""" + + def __init__(self, entry_metadata: dict[str, dict[str, Any]]): + self.entry_metadata = entry_metadata def __call__(self, console: Any, options: Any, panel: Any) -> None: from rich.console import Group as RichGroup from rich.console import NewLine - from rich.text import Text - from cyclopts.help.specs import PanelSpec, TableSpec, get_default_parameter_columns + from cyclopts.help.specs import PanelSpec, TableSpec - input_entries, parameter_entries = _split_pipeline_entries(panel.entries) renderables: list[Any] = [] if panel.description: renderables.append(panel.description) + if not panel.entries: + return - def add_section(title: str, entries: list[Any]) -> None: - if not entries: - return - if renderables: - renderables.append(NewLine()) - renderables.append(Text(title, style="bold")) - columns = get_default_parameter_columns(console, options, entries) - renderables.append(TableSpec().build(columns, entries)) + if renderables: + renderables.append(NewLine()) + columns = _get_pipeline_parameter_columns( + console, panel.entries, self.entry_metadata + ) + renderables.append(TableSpec().build(columns, panel.entries)) + console.print(PanelSpec().build(RichGroup(*renderables), title=panel.title)) - add_section("Inputs", input_entries) - add_section("Parameters", parameter_entries) - if not renderables: - return +def _entry_key(entry: Any) -> str: + options = entry.all_options if hasattr(entry, "all_options") else () + return next((name for name in options if name.startswith("--")), "") - console.print(PanelSpec().build(RichGroup(*renderables), title=panel.title)) +def _unwrap_optional_type(type_hint: Any) -> Any: + origin = get_origin(type_hint) + if origin not in (types.UnionType, Union): + return type_hint + + args = [arg for arg in get_args(type_hint) if arg is not type(None)] + return args[0] if len(args) == 1 else type_hint + + +def _pipeline_type_label(type_hint: Any) -> str: + type_hint = _unwrap_optional_type(type_hint) + if type_hint is bool: + return "BOOLEAN" + if type_hint is int: + return "INTEGER" + if type_hint is float: + return "NUMBER" + if type_hint is Path: + return "PATH" + return "TEXT" + + +def _display_type_label(*, spec_type: str | None, type_hint: Any, is_input: bool) -> str: + if is_input: + return "PATH" + + if spec_type: + compact = _compact_type_text(spec_type) + if compact.startswith("["): + return compact + + return _pipeline_type_label(type_hint) + + +def _output_path_help(description: str | None) -> str: + cleaned = (description or "").strip() + if cleaned: + return f"{cleaned} Overrides --output-dir for this output." + return "Overrides --output-dir for this output." + + +def _render_pipeline_type( + entry: Any, entry_metadata: dict[str, dict[str, Any]], width: int +) -> Any: + from rich.text import Text + + label = entry_metadata.get(_entry_key(entry), {}).get("type_label", "TEXT") + return Text(_wrap_type_label(label, width), style="bold yellow") + + +def _compact_type_text(type_text: str) -> str: + cleaned = type_text.strip() + if "Choices(" not in cleaned: + return f"({cleaned})" + + match = re.search(r"Choices\((.*)\)", cleaned) + if match is None: + return f"({cleaned})" + + choices = [ + choice.strip().strip("'\"") + for choice in match.group(1).split(",") + if choice.strip() + ] + if not choices: + return f"({cleaned})" + return "[" + "|".join(choices) + "]" -def _split_pipeline_entries(entries: list[Any]) -> tuple[list[Any], list[Any]]: - input_entries: list[Any] = [] - parameter_entries: list[Any] = [] - for entry in entries: - options = entry.all_options if hasattr(entry, "all_options") else () - long_name = next((name for name in options if name.startswith("--")), "") - if long_name.startswith("--input-"): - input_entries.append(entry) +def _wrap_type_label(label: str, width: int) -> str: + if len(label) <= width or not (label.startswith("[") and label.endswith("]")): + return label + + choices = [choice for choice in label[1:-1].split("|") if choice] + if not choices: + return label + + lines: list[str] = [] + current = "[" + + for index, choice in enumerate(choices): + is_last = index == len(choices) - 1 + separator = "" if current in ("[", " |") else "|" + suffix = "]" if is_last else "" + candidate = current + separator + choice + suffix + + if len(candidate) <= width or current in ("[", " |"): + current = candidate else: - parameter_entries.append(entry) + lines.append(current) + current = " |" + choice + suffix + + if not current.endswith("]"): + current += "]" + lines.append(current) + return "\n".join(lines) + + +def _render_pipeline_description( + entry: Any, entry_metadata: dict[str, dict[str, Any]] +) -> Any: + from rich.text import Text + + from cyclopts.help.inline_text import InlineText + + metadata = entry_metadata.get(_entry_key(entry), {}) + description = entry.description + if description is None: + description = InlineText(Text()) + elif not isinstance(description, InlineText): + if hasattr(description, "__rich_console__"): + description = InlineText(description) + else: + description = InlineText(Text(str(description))) + + default = metadata.get("default") + if default is not None: + description.append(Text(f"[default: {default}]", "dim")) + + if metadata.get("required"): + description.append(Text("[required]", "dim red")) + + return description + + +def _get_pipeline_parameter_columns( + console: Any, + entries: list[Any], + entry_metadata: dict[str, dict[str, Any]], +) -> tuple[Any, ...]: + from cyclopts.help.specs import ( + ColumnSpec, + NameRenderer, + ) + + max_width = math.ceil(console.width * 0.35) + type_width = max( + 8, + min( + max( + len(entry_metadata.get(_entry_key(entry), {}).get("type_label", "TEXT")) + for entry in entries + ), + max(22, min(34, math.ceil(console.width * 0.3))), + ), + ) + name_column = ColumnSpec( + renderer=NameRenderer(max_width=max_width), + header="Option", + justify="left", + style="cyan", + max_width=max_width, + ) + type_column = ColumnSpec( + renderer=lambda entry: _render_pipeline_type(entry, entry_metadata, type_width), + header="Type", + justify="left", + no_wrap=True, + width=type_width, + min_width=type_width, + max_width=type_width, + ) + description_column = ColumnSpec( + renderer=lambda entry: _render_pipeline_description(entry, entry_metadata), + header="Description", + justify="left", + overflow="fold", + ) - return input_entries, parameter_entries + return (name_column, type_column, description_column) def _spec_py_type(type_name: str) -> type: @@ -104,10 +263,23 @@ def _resolve_param_type(type_name: str, default: Any) -> type: return declared +def _format_help_text( + *, + description: str | None = None, +) -> str: + """Return plain description text for pipeline help rows.""" + return (description or "").strip() + + +def _is_required_param(spec: ParamSpec) -> bool: + return bool(spec.required and spec.default is None) + + def build_dynamic_run( *, input_specs: list[InputSpec], param_specs: list[ParamSpec], + output_specs: list[OutputSpec], argument_inputs: dict[str, Any] | None = None, argument_params: dict[str, Any] | None = None, run_handler: Callable[ @@ -118,18 +290,22 @@ def build_dynamic_run( dict[str, Any], list[tuple[str, str]], list[tuple[str, str]], + list[tuple[str, str]], + str, list[str], list[str], ], None, ], ): - """Build a dynamic run command from pipeline input and parameter specs.""" + """Build a dynamic run command from pipeline input, parameter, and output specs.""" input_bindings: list[tuple[str, str]] = [] param_bindings: list[tuple[str, str]] = [] + output_bindings: list[tuple[str, str]] = [] required_inputs: list[str] = [] required_params: list[str] = [] seen_idents: set[str] = set() + entry_metadata: dict[str, dict[str, Any]] = {} seen_opts: set[str] = { "--pipeline", "-p", @@ -139,6 +315,7 @@ def build_dynamic_run( "--cache-dir", "--reuse", "--no-reuse", + "--output-dir", } argument_inputs = argument_inputs or {} argument_params = argument_params or {} @@ -146,7 +323,7 @@ def build_dynamic_run( pipeline_group = Group( "Pipeline", sort_key=1, - help_formatter=_PipelineGroupFormatter(), + help_formatter=_PipelineGroupFormatter(entry_metadata), ) annotations: dict[str, Any] = { @@ -201,6 +378,14 @@ def build_dynamic_run( help=REUSE_HELP, ), ] + annotations["output_dir"] = Annotated[ + Path | None, + CliParameter( + name=("--output-dir",), + group=command_group, + help="Directory for all pipeline outputs.", + ), + ] parameters: list[inspect.Parameter] = [ inspect.Parameter( @@ -208,6 +393,11 @@ def build_dynamic_run( kind=inspect.Parameter.KEYWORD_ONLY, annotation=annotations["pipeline"], ), + inspect.Parameter( + name="cache_dir", + kind=inspect.Parameter.KEYWORD_ONLY, + annotation=annotations["cache_dir"], + ), inspect.Parameter( name="arguments_file", kind=inspect.Parameter.KEYWORD_ONLY, @@ -226,17 +416,18 @@ def build_dynamic_run( default=None, annotation=annotations["config_file"], ), - inspect.Parameter( - name="cache_dir", - kind=inspect.Parameter.KEYWORD_ONLY, - annotation=annotations["cache_dir"], - ), inspect.Parameter( name="reuse", kind=inspect.Parameter.KEYWORD_ONLY, default=True, annotation=annotations["reuse"], ), + inspect.Parameter( + name="output_dir", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["output_dir"], + ), ] def add_dynamic_option( @@ -272,7 +463,12 @@ def add_dynamic_option( ) ) - for spec in input_specs: + required_input_specs = [spec for spec in input_specs if spec.required] + optional_input_specs = [spec for spec in input_specs if not spec.required] + required_param_specs = [spec for spec in param_specs if _is_required_param(spec)] + optional_param_specs = [spec for spec in param_specs if not _is_required_param(spec)] + + def add_input_spec(spec: InputSpec) -> None: original = spec.name ident = to_identifier(original, "input") if ident in seen_idents: @@ -288,21 +484,26 @@ def add_dynamic_option( type_text = spec.type opt = dynamic_opt(original, ParamType.INPUT) + entry_metadata[opt] = { + "type_label": _display_type_label( + spec_type=type_text, type_hint=str, is_input=True + ), + "default": None, + "required": display_required, + } add_dynamic_option( ident=ident, opt=opt, required=False, py_type=str, - help_text=( - f"Pipeline input: {original}" - + (f" ({type_text})" if type_text else "") - + (" [required]" if display_required else "") + help_text=_format_help_text( + description=spec.description, ), default=None, group=pipeline_group, ) - for spec in param_specs: + def add_param_spec(spec: ParamSpec) -> None: original = spec.name ident = to_identifier(original, "param") if ident in seen_idents: @@ -313,8 +514,7 @@ def add_dynamic_option( param_bindings.append((ident, original)) default = spec.default - required = spec.required - is_required = bool(required and default is None) + is_required = _is_required_param(spec) argument_value = argument_params.get(original) has_argument_default = not _is_missing(argument_value) display_default = ( @@ -326,29 +526,71 @@ def add_dynamic_option( opt = dynamic_opt(original, ParamType.PARAM) if is_required: required_params.append(original) - default_text = f" [default: {display_default}]" if display_default is not None else "" + entry_metadata[opt] = { + "type_label": _display_type_label( + spec_type=spec.type, type_hint=param_type, is_input=False + ), + "default": display_default, + "required": display_required, + } add_dynamic_option( ident=ident, opt=opt, required=False, py_type=param_type, - help_text=( - f"Pipeline parameter: {original}" - + (" [required]" if display_required else "") - + default_text + help_text=_format_help_text( + description=spec.description, ), default=param_default, group=pipeline_group, ) + for spec in required_input_specs: + add_input_spec(spec) + for spec in required_param_specs: + add_param_spec(spec) + for spec in optional_input_specs: + add_input_spec(spec) + for spec in optional_param_specs: + add_param_spec(spec) + + for spec in output_specs: + original = spec.name + ident = to_identifier(original, "output") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline output name after normalization: {original!r}." + ) + seen_idents.add(ident) + output_bindings.append((ident, original)) + opt = dynamic_opt(original, ParamType.OUTPUT) + entry_metadata[opt] = { + "type_label": "PATH", + "default": None, + "required": False, + } + add_dynamic_option( + ident=ident, + opt=opt, + required=False, + py_type=str, + help_text=_format_help_text( + description=_output_path_help(spec.description), + ), + default=None, + group=pipeline_group, + ) + def run( pipeline: Path, arguments_file: Path | None = None, show_params: ShowParamsMode = ShowParamsMode.REQUIRED, config_file: Path | None = None, + output_dir: Path | None = None, **kwargs: Any, ) -> None: _ = show_params + kwargs["output_dir"] = output_dir run_handler( pipeline, arguments_file, @@ -356,6 +598,8 @@ def run( kwargs, input_bindings, param_bindings, + output_bindings, + "output_dir", required_inputs, required_params, ) @@ -364,7 +608,7 @@ def run( run.__signature__ = inspect.Signature(parameters) run.__doc__ = ( "Run an Adagio pipeline.\n\n" - "Dynamic inputs and parameters are loaded from the pipeline file and exposed as CLI options.\n" + "Dynamic inputs, parameters, and outputs are loaded from the pipeline file and exposed as CLI options.\n" "Use: adagio run --pipeline PATH --help" ) return run diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index cfa5da0..ab0da8e 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -9,8 +9,9 @@ from rich.console import Console from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Output as OutputSpec from ..app.parsers.pipeline import Parameter as ParamSpec -from ..app.parsers.pipeline import parse_inputs, parse_parameters +from ..app.parsers.pipeline import parse_inputs, parse_outputs, parse_parameters from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .config import load_run_config @@ -59,6 +60,7 @@ def main(argv: list[str] | None = None) -> None: app = App( name="adagio", help="Adagio command line tool for processing pipelines created with the Adagio GUI.", + help_format="rich", ) app.command(build_qapi, name="build-qapi") @@ -142,6 +144,7 @@ def run( data = json.loads(pipeline_path.read_text(encoding="utf-8")) input_specs = parse_inputs(data) param_specs = parse_parameters(data) + output_specs = parse_outputs(data) arguments_path_str = extract_flag_value(argv, "--arguments") config_path_str = extract_flag_value(argv, "--config") arguments_data = ( @@ -149,9 +152,10 @@ def run( ) if config_path_str: load_run_config(Path(config_path_str)) - visible_inputs, visible_params = _filter_visible_specs( + visible_inputs, visible_params, visible_outputs = _filter_visible_specs( input_specs=input_specs, param_specs=param_specs, + output_specs=output_specs, show_mode=show_mode, arguments_data=arguments_data, ) @@ -159,6 +163,7 @@ def run( dynamic_run = build_dynamic_run( input_specs=visible_inputs, param_specs=visible_params, + output_specs=visible_outputs, argument_inputs=arguments_data.get("inputs", {}) if arguments_data else None, argument_params=arguments_data.get("parameters", {}) if arguments_data else None, run_handler=partial(run_pipeline_from_kwargs, console=console), @@ -171,11 +176,12 @@ def _filter_visible_specs( *, input_specs: list[InputSpec], param_specs: list[ParamSpec], + output_specs: list[OutputSpec], show_mode: ShowParamsMode, arguments_data: dict[str, Any] | None, -) -> tuple[list[InputSpec], list[ParamSpec]]: +) -> tuple[list[InputSpec], list[ParamSpec], list[OutputSpec]]: if show_mode is ShowParamsMode.ALL: - return input_specs, param_specs + return input_specs, param_specs, output_specs state_inputs = {spec.name: None for spec in input_specs} state_params = {spec.name: spec.default for spec in param_specs} @@ -199,7 +205,7 @@ def _filter_visible_specs( and _is_missing(state_params.get(spec.name)) ) ] - return filtered_inputs, filtered_params + return filtered_inputs, filtered_params, [] filtered_inputs = [ spec for spec in input_specs if _is_missing(state_inputs.get(spec.name)) @@ -207,7 +213,7 @@ def _filter_visible_specs( filtered_params = [ spec for spec in param_specs if _is_missing(state_params.get(spec.name)) ] - return filtered_inputs, filtered_params + return filtered_inputs, filtered_params, [] def _load_arguments_data(path: Path, _console: Console | None = None) -> dict[str, Any]: diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 962a8db..6783355 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -40,6 +40,8 @@ def run_pipeline_from_kwargs( kwargs: dict[str, Any], input_bindings: list[tuple[str, str]], param_bindings: list[tuple[str, str]], + output_bindings: list[tuple[str, str]], + output_dir_ident: str, required_inputs: list[str], required_params: list[str], *, @@ -105,6 +107,19 @@ def run_pipeline_from_kwargs( if value is not None: arguments.parameters[original] = value + cli_output_dir = kwargs.get(output_dir_ident) + cli_output_overrides = { + original: str(value) + for ident, original in output_bindings + if (value := kwargs.get(ident)) is not None + } + arguments.outputs = _apply_output_overrides( + outputs=arguments.outputs, + output_names=output_names, + output_dir=str(cli_output_dir) if cli_output_dir is not None else None, + output_overrides=cli_output_overrides, + ) + missing_inputs = [ name for name in required_inputs if _is_missing(arguments.inputs.get(name)) ] @@ -194,6 +209,44 @@ def _resolve_output_destinations( return resolved +def _apply_output_overrides( + *, + outputs: str | dict[str, str], + output_names: list[str], + output_dir: str | None, + output_overrides: dict[str, str], +) -> str | dict[str, str]: + if output_dir is not None: + if not output_overrides: + return output_dir + + resolved = { + output_name: os.path.join(output_dir, output_name) + for output_name in output_names + } + resolved.update(output_overrides) + return resolved + + if not output_overrides: + return outputs + + if isinstance(outputs, dict): + resolved = dict(outputs) + elif isinstance(outputs, str): + if _is_missing_output(outputs): + resolved = {} + else: + resolved = { + output_name: os.path.join(outputs, output_name) + for output_name in output_names + } + else: + raise TypeError("Unsupported outputs configuration.") + + resolved.update(output_overrides) + return resolved + + def _is_truthy(value: str | None) -> bool: if value is None: return False diff --git a/src/adagio/model/pipeline.py b/src/adagio/model/pipeline.py index 68bf31a..17e83bc 100644 --- a/src/adagio/model/pipeline.py +++ b/src/adagio/model/pipeline.py @@ -110,6 +110,7 @@ class _Def(BaseModel): name: str type: str ast: TypeAST + description: str | None = None class _InputDef(_Def): diff --git a/tests/test_output_options.py b/tests/test_output_options.py new file mode 100644 index 0000000..229ab8e --- /dev/null +++ b/tests/test_output_options.py @@ -0,0 +1,202 @@ +import typing +import unittest + +from adagio.app.parsers.pipeline import Input, Output, Parameter, parse_outputs +from adagio.cli.args import ShowParamsMode +from adagio.cli.dynamic import build_dynamic_run +from adagio.cli.main import _filter_visible_specs +from adagio.cli.runner import _apply_output_overrides + + +class OutputOptionTests(unittest.TestCase): + def test_parse_outputs_preserves_descriptions(self) -> None: + data = { + "signature": { + "inputs": [], + "parameters": [], + "outputs": [ + { + "id": "00000000-0000-0000-0000-000000000001", + "name": "table", + "type": "FeatureTable[Frequency]", + "description": "Denoised feature table.", + } + ], + } + } + + outputs = parse_outputs(data) + + self.assertEqual(outputs[0].name, "table") + self.assertEqual(outputs[0].description, "Denoised feature table.") + + def test_dynamic_run_adds_output_dir_and_per_output_options(self) -> None: + dynamic_run = build_dynamic_run( + input_specs=[], + param_specs=[], + output_specs=[ + Output( + id="00000000-0000-0000-0000-000000000001", + name="table", + type="FeatureTable[Frequency]", + description="Denoised feature table.", + ) + ], + run_handler=lambda *args, **kwargs: None, + ) + + self.assertIn("output_dir", dynamic_run.__signature__.parameters) + self.assertIn("output_table", dynamic_run.__signature__.parameters) + + output_dir_annotation = dynamic_run.__signature__.parameters["output_dir"].annotation + output_annotation = dynamic_run.__signature__.parameters["output_table"].annotation + output_dir_help = typing.get_args(output_dir_annotation)[1].help + output_help = typing.get_args(output_annotation)[1].help + + self.assertEqual(output_dir_help, "Directory for all pipeline outputs.") + self.assertIn("Denoised feature table.", output_help) + self.assertIn("Overrides --output-dir", output_help) + + def test_output_dir_is_a_command_option_and_required_pipeline_options_are_first( + self, + ) -> None: + dynamic_run = build_dynamic_run( + input_specs=[ + Input( + id="00000000-0000-0000-0000-000000000001", + name="tree", + required=False, + type="Phylogeny[Rooted]", + description="Optional tree.", + ), + Input( + id="00000000-0000-0000-0000-000000000002", + name="seqs", + required=True, + type="SampleData[Sequences]", + description="Required sequences.", + ), + ], + param_specs=[ + Parameter( + id="00000000-0000-0000-0000-000000000003", + name="threads", + required=False, + default=1, + type="Int", + description="Optional thread count.", + ), + Parameter( + id="00000000-0000-0000-0000-000000000004", + name="metric", + required=True, + default=None, + type="Str", + description="Required metric.", + ), + ], + output_specs=[ + Output( + id="00000000-0000-0000-0000-000000000005", + name="table", + type="FeatureTable[Frequency]", + description="Output table.", + ) + ], + run_handler=lambda *args, **kwargs: None, + ) + + output_dir_annotation = dynamic_run.__signature__.parameters["output_dir"].annotation + output_dir_group = typing.get_args(output_dir_annotation)[1].group + + self.assertEqual(output_dir_group[0]._name, "Command Options") + self.assertEqual( + list(dynamic_run.__signature__.parameters)[:7], + [ + "pipeline", + "cache_dir", + "arguments_file", + "show_params", + "config_file", + "reuse", + "output_dir", + ], + ) + self.assertEqual( + list(dynamic_run.__signature__.parameters)[7:], + [ + "input_seqs", + "param_metric", + "input_tree", + "param_threads", + "output_table", + ], + ) + + def test_outputs_are_only_visible_in_all_mode(self) -> None: + output_specs = [ + Output( + id="00000000-0000-0000-0000-000000000005", + name="table", + type="FeatureTable[Frequency]", + description="Output table.", + ) + ] + + _, _, required_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.REQUIRED, + arguments_data=None, + ) + _, _, missing_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.MISSING, + arguments_data=None, + ) + _, _, all_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.ALL, + arguments_data=None, + ) + + self.assertEqual(required_outputs, []) + self.assertEqual(missing_outputs, []) + self.assertEqual(all_outputs, output_specs) + + def test_output_dir_override_applies_to_all_outputs(self) -> None: + resolved = _apply_output_overrides( + outputs={"table": "/tmp/from-file/table.qza", "stats": "/tmp/from-file/stats.qza"}, + output_names=["table", "stats"], + output_dir="/tmp/all-outputs", + output_overrides={"stats": "/tmp/custom/stats.qza"}, + ) + + self.assertEqual( + resolved, + { + "table": "/tmp/all-outputs/table", + "stats": "/tmp/custom/stats.qza", + }, + ) + + def test_per_output_override_merges_with_shared_directory_outputs(self) -> None: + resolved = _apply_output_overrides( + outputs="/tmp/from-arguments-dir", + output_names=["table", "stats"], + output_dir=None, + output_overrides={"stats": "/tmp/custom/stats.qza"}, + ) + + self.assertEqual( + resolved, + { + "table": "/tmp/from-arguments-dir/table", + "stats": "/tmp/custom/stats.qza", + }, + ) diff --git a/tests/test_pipeline_descriptions.py b/tests/test_pipeline_descriptions.py new file mode 100644 index 0000000..ceaec18 --- /dev/null +++ b/tests/test_pipeline_descriptions.py @@ -0,0 +1,182 @@ +import typing +import unittest + +from adagio.app.parsers.pipeline import Input, Parameter, parse_inputs, parse_parameters +from adagio.cli.dynamic import ( + _compact_type_text, + _display_type_label, + _pipeline_type_label, + _wrap_type_label, + build_dynamic_run, +) +from adagio.model.pipeline import AdagioPipeline + + +class PipelineDescriptionTests(unittest.TestCase): + def test_pipeline_model_accepts_signature_descriptions(self) -> None: + ast = { + "type": "expression", + "builtin": True, + "name": "Str", + "predicate": None, + "fields": [], + } + pipeline = AdagioPipeline.model_validate( + { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-1", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": ast, + "required": True, + "description": "Input table.", + } + ], + "parameters": [ + { + "id": "param-1", + "name": "trunc_len", + "type": "Int", + "ast": ast, + "required": False, + "default": 120, + "description": "Trim reads to this length.", + } + ], + "outputs": [ + { + "id": "output-1", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": ast, + "description": "Denoised table.", + } + ], + }, + "graph": [], + } + ) + + self.assertEqual(pipeline.signature.inputs[0].description, "Input table.") + self.assertEqual( + pipeline.signature.parameters[0].description, + "Trim reads to this length.", + ) + self.assertEqual(pipeline.signature.outputs[0].description, "Denoised table.") + + def test_pipeline_parsers_preserve_descriptions(self) -> None: + data = { + "signature": { + "inputs": [ + { + "id": "00000000-0000-0000-0000-000000000001", + "name": "table", + "required": True, + "type": "FeatureTable[Frequency]", + "description": "Input table.", + } + ], + "parameters": [ + { + "id": "00000000-0000-0000-0000-000000000002", + "name": "trunc_len", + "required": False, + "default": 120, + "type": "Int", + "description": "Trim reads to this length.", + } + ], + "outputs": [], + } + } + + self.assertEqual(parse_inputs(data)[0].description, "Input table.") + self.assertEqual( + parse_parameters(data)[0].description, "Trim reads to this length." + ) + + def test_dynamic_run_help_includes_descriptions(self) -> None: + dynamic_run = build_dynamic_run( + input_specs=[ + Input( + id="00000000-0000-0000-0000-000000000001", + name="table", + required=True, + type="FeatureTable[Frequency]", + description="Input table.", + ) + ], + param_specs=[ + Parameter( + id="00000000-0000-0000-0000-000000000002", + name="trunc_len", + required=False, + default=120, + type="Int", + description="Trim reads to this length.", + ) + ], + output_specs=[], + run_handler=lambda *args, **kwargs: None, + ) + + input_annotation = dynamic_run.__signature__.parameters["input_table"].annotation + param_annotation = dynamic_run.__signature__.parameters["param_trunc_len"].annotation + input_help = typing.get_args(input_annotation)[1].help + param_help = typing.get_args(param_annotation)[1].help + + self.assertIsInstance(input_help, str) + self.assertIsInstance(param_help, str) + self.assertIn("Input table.", input_help) + self.assertIn("Trim reads to this length.", param_help) + self.assertNotIn("Pipeline input:", input_help) + self.assertNotIn("Pipeline parameter:", param_help) + + def test_choices_are_rendered_compactly(self) -> None: + compact = _compact_type_text( + "Str % Choices('ace', 'berger_parker_d', 'brillouin_d')" + ) + self.assertEqual(compact, "[ace|berger_parker_d|brillouin_d]") + + compact_unquoted = _compact_type_text( + "Str % Choices(ace, berger_parker_d, brillouin_d)" + ) + self.assertEqual(compact_unquoted, "[ace|berger_parker_d|brillouin_d]") + + def test_long_choice_labels_wrap_on_pipes(self) -> None: + wrapped = _wrap_type_label( + "[ace|berger_parker_d|brillouin_d|chao1|dominance]", 22 + ) + self.assertIn("\n", wrapped) + self.assertTrue(wrapped.startswith("[")) + self.assertTrue(wrapped.endswith("]")) + self.assertIn("\n |", wrapped) + + def test_pipeline_type_labels_use_general_cli_types(self) -> None: + self.assertEqual(_pipeline_type_label(int), "INTEGER") + self.assertEqual(_pipeline_type_label(float), "NUMBER") + self.assertEqual(_pipeline_type_label(bool), "BOOLEAN") + self.assertEqual(_pipeline_type_label(str | None), "TEXT") + + def test_display_type_label_prefers_choices_and_path(self) -> None: + self.assertEqual( + _display_type_label( + spec_type="FeatureTable[Frequency]", type_hint=str, is_input=True + ), + "PATH", + ) + self.assertEqual( + _display_type_label( + spec_type="Str % Choices(ace, berger_parker_d, brillouin_d)", + type_hint=str, + is_input=False, + ), + "[ace|berger_parker_d|brillouin_d]", + ) + self.assertEqual( + _display_type_label(spec_type="Int", type_hint=int, is_input=False), + "INTEGER", + ) From 31a7077de3b83acc2f42b1c064f9cbddcdd57832 Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 27 Mar 2026 11:08:38 -0700 Subject: [PATCH 34/44] Adds pipeline summary commmand --- README.md | 14 +- src/adagio/cli/main.py | 25 ++- src/adagio/cli/pipeline.py | 27 +++ src/adagio/cli/qapi.py | 11 +- src/adagio/describe.py | 347 ++++++++++++++++++++++++++++++++++++ tests/test_pipeline_show.py | 182 +++++++++++++++++++ 6 files changed, 599 insertions(+), 7 deletions(-) create mode 100644 src/adagio/cli/pipeline.py create mode 100644 tests/test_pipeline_show.py diff --git a/README.md b/README.md index 95ff873..ddc895b 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,14 @@ Clear an existing cache directory: adagio cache clear --cache-dir /path/to/cache ``` +### Inspect a pipeline + +Print a dependency-ordered summary of the plugin actions in a pipeline: + +```bash +adagio pipeline show path/to/pipeline.json +``` + ### Arguments file format `--arguments` can be downloaded from Adagio directly in the "Run" workflow : @@ -173,19 +181,19 @@ tasks serially; no scheduler submission or remote image pull behavior is include Generate and submit plugin metadata from the active QIIME environment: ```bash -adagio build-qapi --action-url http://localhost:81/api/v1 +adagio qapi build --action-url http://localhost:81/api/v1 ``` Write payload to disk without submitting: ```bash -adagio build-qapi --output qapi.json --dry-run +adagio qapi build --output qapi.json --dry-run ``` Submit selected plugins only: ```bash -adagio build-qapi --plugin dada2 --plugin feature-table +adagio qapi build --plugin dada2 --plugin feature-table ``` ## Development diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index ab0da8e..4d29c20 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -16,7 +16,8 @@ from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .config import load_run_config from .dynamic import build_dynamic_run -from .qapi import build_qapi +from .pipeline import run_pipeline_cli +from .qapi import run_qapi from .runner import run_pipeline_from_kwargs @@ -44,6 +45,14 @@ def main(argv: list[str] | None = None) -> None: run_runtime(argv[1:], console=console) return + if argv and argv[0] == "qapi": + run_qapi(argv[1:]) + return + + if argv and argv[0] == "pipeline": + run_pipeline_cli(argv[1:]) + return + argv, positional_pipeline = promote_positional_pipeline(argv) pipeline_str = extract_flag_value(argv, "--pipeline", "-p") show_mode_str = extract_flag_value(argv, "--show-params") @@ -62,8 +71,6 @@ def main(argv: list[str] | None = None) -> None: help="Adagio command line tool for processing pipelines created with the Adagio GUI.", help_format="rich", ) - app.command(build_qapi, name="build-qapi") - @app.command def cache() -> None: """Manage the shared QIIME cache directory.""" @@ -76,6 +83,18 @@ def runtime() -> None: console.print(CycloptsPanel("Try: adagio runtime --help")) sys.exit(1) + @app.command + def qapi() -> None: + """Generate and submit QAPI payloads.""" + console.print(CycloptsPanel("Try: adagio qapi --help")) + sys.exit(1) + + @app.command + def pipeline() -> None: + """Inspect pipeline definitions.""" + console.print(CycloptsPanel("Try: adagio pipeline --help")) + sys.exit(1) + if not pipeline_str: command_group = Group("Command Options", sort_key=0) diff --git a/src/adagio/cli/pipeline.py b/src/adagio/cli/pipeline.py new file mode 100644 index 0000000..0d7168c --- /dev/null +++ b/src/adagio/cli/pipeline.py @@ -0,0 +1,27 @@ +import json +from pathlib import Path + +from cyclopts import App +from rich.console import Console + +from ..describe import render_pipeline_text +from ..model.pipeline import AdagioPipeline + +console = Console() + + +def run_pipeline_cli(argv: list[str]) -> None: + app = App( + name="adagio pipeline", + help="Inspect pipeline definitions.", + ) + app.command(show_pipeline, name="show") + app(argv) + + +def show_pipeline(pipeline: Path) -> None: + """Print a pipeline summary to the terminal.""" + data = json.loads(pipeline.read_text(encoding="utf-8")) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) + console.print(render_pipeline_text(parsed_pipeline), soft_wrap=True) diff --git a/src/adagio/cli/qapi.py b/src/adagio/cli/qapi.py index 6123419..716dab1 100644 --- a/src/adagio/cli/qapi.py +++ b/src/adagio/cli/qapi.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Annotated -from cyclopts import Parameter +from cyclopts import App, Parameter from rich.console import Console from ..qapi import DEFAULT_SCHEMA_VERSION, generate_qapi_payload, submit_qapi_payload @@ -11,6 +11,15 @@ console = Console() +def run_qapi(argv: list[str]) -> None: + app = App( + name="adagio qapi", + help="Generate and submit QAPI payloads from the active QIIME environment.", + ) + app.command(build_qapi, name="build") + app(argv) + + def _print_submission_summary(response_body: object) -> None: if isinstance(response_body, dict): message = response_body.get("message") diff --git a/src/adagio/describe.py b/src/adagio/describe.py index e69de29..e79aa2c 100644 --- a/src/adagio/describe.py +++ b/src/adagio/describe.py @@ -0,0 +1,347 @@ +import json +from dataclasses import dataclass + +from rich import box +from rich.console import Group, NewLine +from rich.panel import Panel +from rich.text import Text + +from .cli.dynamic import _compact_type_text +from .executors.common import plan_execution_order +from .model.pipeline import AdagioPipeline +from .model.task import LiteralVal, MetadataVal, PluginActionTask, PromotedVal, RootInputTask + + +@dataclass(frozen=True) +class _DisplayRef: + label: str + type_label: str | None = None + description: str | None = None + + +def render_pipeline_text(pipeline: AdagioPipeline) -> Text | Group: + available_ids = { + input_def.id: _DisplayRef( + label=_pipeline_input_label(input_def.name), + type_label=_format_spec_type(input_def.type), + description=_clean_description(input_def.description), + ) + for input_def in pipeline.signature.inputs + } + parameter_refs = { + parameter.id: _DisplayRef( + label=_pipeline_parameter_label(parameter.name), + type_label=_format_spec_type(parameter.type), + description=_clean_description(parameter.description), + ) + for parameter in pipeline.signature.parameters + } + pipeline_output_refs = { + output.id: _DisplayRef( + label=f'pipeline output "{output.name}"', + type_label=_format_spec_type(output.type), + description=_clean_description(output.description), + ) + for output in pipeline.signature.outputs + } + execution_plan = plan_execution_order( + tasks=list(pipeline.iter_tasks()), + scope=available_ids, + ) + + panels = [] + for task in execution_plan: + if isinstance(task, RootInputTask): + _record_root_input_outputs(task=task, available_ids=available_ids) + continue + + if not isinstance(task, PluginActionTask): + continue + + body = Text(no_wrap=False, overflow="fold") + _append_section_header(body, "Inputs") + _append_input_lines(body, task=task, available_ids=available_ids) + _append_section_header(body, "Parameters") + _append_parameter_lines( + body, + task=task, + available_ids=available_ids, + parameter_refs=parameter_refs, + ) + _append_section_header(body, "Outputs") + _append_output_lines( + body, + task=task, + pipeline_output_refs=pipeline_output_refs, + ) + panels.append( + Panel( + body, + title=f"{task.plugin}.{task.action}", + title_align="left", + border_style="cyan", + box=box.ROUNDED, + expand=True, + ) + ) + + for output_name, output in task.outputs.items(): + pipeline_output_ref = pipeline_output_refs.get(output.id) + available_ids[output.id] = _DisplayRef( + label=f"{task.plugin}.{task.action}.{output_name}", + type_label=( + pipeline_output_ref.type_label + if pipeline_output_ref is not None + else None + ), + description=( + pipeline_output_ref.description + if pipeline_output_ref is not None + else None + ), + ) + + if not panels: + return Text("No plugin actions found.", style="dim") + + renderables = [] + for index, panel in enumerate(panels): + if index: + renderables.append(NewLine()) + renderables.append(panel) + return Group(*renderables) + + +def _append_section_header(rendered: Text, title: str) -> None: + rendered.append(f" {title}:\n", style="bold cyan") + + +def _append_input_lines( + rendered: Text, + *, + task: PluginActionTask, + available_ids: dict[str, _DisplayRef], +) -> None: + if not task.inputs: + _append_none_line(rendered) + return + + for input_name, source in task.inputs.items(): + reference = available_ids.get(source.id, _unknown_reference(source.id)) + _append_entry_line( + rendered, + name=input_name, + type_label=reference.type_label, + value_text=reference.label, + description=reference.description, + ) + + +def _append_parameter_lines( + rendered: Text, + *, + task: PluginActionTask, + available_ids: dict[str, _DisplayRef], + parameter_refs: dict[str, _DisplayRef], +) -> None: + if not task.parameters: + _append_none_line(rendered) + return + + for parameter_name, value in task.parameters.items(): + rendered_value, display = _render_parameter_value( + task=task, + parameter_name=parameter_name, + value=value, + available_ids=available_ids, + parameter_refs=parameter_refs, + ) + _append_entry_line( + rendered, + name=parameter_name, + type_label=display.type_label if display is not None else None, + value_text=rendered_value, + description=display.description if display is not None else None, + ) + + +def _append_output_lines( + rendered: Text, + *, + task: PluginActionTask, + pipeline_output_refs: dict[str, _DisplayRef], +) -> None: + if not task.outputs: + _append_none_line(rendered) + return + + for output_name, output in task.outputs.items(): + pipeline_output_ref = pipeline_output_refs.get(output.id) + value_text = _output_annotation(output_name=output_name, output_id=output.id) + _append_entry_line( + rendered, + name=output_name, + type_label=( + pipeline_output_ref.type_label + if pipeline_output_ref is not None + else None + ), + value_text=value_text, + description=( + pipeline_output_ref.description + if pipeline_output_ref is not None + else None + ), + ) + + +def _append_none_line(rendered: Text) -> None: + rendered.append(" (none)\n", style="dim") + + +def _append_entry_line( + rendered: Text, + *, + name: str, + type_label: str | None, + value_text: str | None, + description: str | None, +) -> None: + rendered.append(" - ") + rendered.append(name, style="cyan") + if value_text is not None: + rendered.append(":", style="cyan") + if type_label: + rendered.append(" ") + rendered.append(type_label, style="bold yellow") + if value_text: + rendered.append(" ") + rendered.append(value_text) + rendered.append("\n") + if description: + rendered.append(" ") + rendered.append(description, style="dim") + rendered.append("\n") + + +def _render_parameter_value( + *, + task: PluginActionTask, + parameter_name: str, + value: object, + available_ids: dict[str, _DisplayRef], + parameter_refs: dict[str, _DisplayRef], +) -> tuple[str, _DisplayRef | None]: + if isinstance(value, PromotedVal): + display = parameter_refs.get(value.id) + if display is not None: + return display.label, display + return _pipeline_parameter_label(value.id), None + + if isinstance(value, LiteralVal): + return _render_literal(value.value), _literal_display(value.value) + + if isinstance(value, MetadataVal): + source = task.inputs.get(parameter_name) + source_ref = ( + available_ids.get(source.id, _unknown_reference(source.id)) + if source is not None + else _DisplayRef(label=f'input "{parameter_name}"') + ) + column_label, display = _render_metadata_column( + column=value.column, + parameter_refs=parameter_refs, + ) + rendered_value = ( + f"metadata column from {source_ref.label} using {column_label}" + ) + if display is not None: + return rendered_value, display + return rendered_value, _DisplayRef( + label=rendered_value, + description=source_ref.description, + ) + + return str(value), None + + +def _render_metadata_column( + *, + column: object, + parameter_refs: dict[str, _DisplayRef], +) -> tuple[str, _DisplayRef | None]: + if isinstance(column, PromotedVal): + display = parameter_refs.get(column.id) + if display is not None: + return display.label, display + return _pipeline_parameter_label(column.id), None + + value = getattr(column, "value", None) + return _render_literal(value), _literal_display(value) + + +def _render_literal(value: object) -> str: + return json.dumps(value, sort_keys=True) + + +def _literal_display(value: object) -> _DisplayRef: + type_label = _format_literal_type(value) + return _DisplayRef(label=_render_literal(value), type_label=type_label) + + +def _format_literal_type(value: object) -> str | None: + if isinstance(value, bool): + return "(Boolean)" + if isinstance(value, int): + return "(Int)" + if isinstance(value, float): + return "(Float)" + if isinstance(value, str): + return "(Str)" + return None + + +def _record_root_input_outputs( + *, + task: RootInputTask, + available_ids: dict[str, _DisplayRef], +) -> None: + for name, output in task.outputs.items(): + source = task.inputs.get(name) + if source is None: + available_ids[output.id] = _DisplayRef(label=_pipeline_input_label(name)) + continue + available_ids[output.id] = available_ids.get( + source.id, + _unknown_reference(source.id), + ) + + +def _output_annotation(*, output_name: str, output_id: str) -> str | None: + _ = output_name + _ = output_id + return None + + +def _format_spec_type(type_text: str | None) -> str | None: + cleaned = (type_text or "").strip() + if not cleaned: + return None + return _compact_type_text(cleaned) + + +def _clean_description(description: str | None) -> str | None: + cleaned = (description or "").strip() + return cleaned or None + + +def _pipeline_input_label(name: str) -> str: + return f'pipeline input "{name}"' + + +def _pipeline_parameter_label(name: str) -> str: + return f'pipeline parameter "{name}"' + + +def _unknown_reference(identifier: str) -> _DisplayRef: + return _DisplayRef(label=f'unknown reference "{identifier}"') diff --git a/tests/test_pipeline_show.py b/tests/test_pipeline_show.py new file mode 100644 index 0000000..324e899 --- /dev/null +++ b/tests/test_pipeline_show.py @@ -0,0 +1,182 @@ +import io +import json +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + +from rich.console import Console + +from adagio.describe import render_pipeline_text +from adagio.model.pipeline import AdagioPipeline + + +AST = { + "type": "expression", + "builtin": True, + "name": "Str", + "predicate": None, + "fields": [], +} + + +def _sample_pipeline_dict() -> dict: + return { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-seqs", + "name": "seqs", + "type": "SampleData[SequencesWithQuality]", + "ast": AST, + "required": True, + "description": "Demultiplexed sequence data.", + }, + { + "id": "input-barcodes", + "name": "barcodes", + "type": "MetadataColumn[Categorical]", + "ast": AST, + "required": True, + "description": "Barcode metadata column.", + }, + ], + "parameters": [ + { + "id": "param-barcodes", + "name": "barcodes", + "type": "MetadataColumn[Categorical]", + "ast": AST, + "required": True, + "description": "Column used to find barcode values.", + }, + { + "id": "param-trim-left", + "name": "trim_left", + "type": "Int", + "ast": AST, + "required": True, + "description": "Trim this many bases from the start of each read.", + }, + ], + "outputs": [ + { + "id": "output-table", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": AST, + "description": "Denoised feature table.", + }, + { + "id": "output-demux", + "name": "per_sample_sequences", + "type": "SampleData[SequencesWithQuality]", + "ast": AST, + "description": "Per-sample demultiplexed sequences.", + } + ], + }, + "graph": [ + { + "id": "task-dada2", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": { + "demultiplexed_seqs": {"kind": "archive", "id": "output-demux"} + }, + "parameters": { + "trim_left": {"kind": "promoted", "id": "param-trim-left"} + }, + "outputs": { + "table": {"kind": "archive", "id": "output-table"} + }, + }, + { + "id": "task-demux", + "kind": "plugin-action", + "plugin": "demux", + "action": "emp_single", + "inputs": { + "seqs": {"kind": "archive", "id": "input-seqs"}, + "barcodes": {"kind": "metadata", "id": "input-barcodes"}, + }, + "parameters": { + "barcodes": { + "kind": "metadata", + "column": {"kind": "promoted", "id": "param-barcodes"}, + } + }, + "outputs": { + "per_sample_sequences": { + "kind": "archive", + "id": "output-demux", + } + }, + }, + ], + } + + +def _render_plain(renderable: object) -> str: + console = Console(record=True, width=160, file=io.StringIO()) + console.print(renderable, soft_wrap=True) + return console.export_text() + + +class PipelineShowTests(unittest.TestCase): + def test_render_pipeline_text_uses_dependency_order_and_resolves_bindings( + self, + ) -> None: + pipeline = AdagioPipeline.model_validate(_sample_pipeline_dict()) + + rendered = _render_plain(render_pipeline_text(pipeline)) + + self.assertLess(rendered.index("demux.emp_single"), rendered.index("dada2.denoise_single")) + self.assertNotIn('Plugin: demux', rendered) + self.assertNotIn('Action: emp_single', rendered) + self.assertIn("╭─ demux.emp_single ", rendered) + self.assertIn('seqs: (SampleData[SequencesWithQuality]) pipeline input "seqs"', rendered) + self.assertIn('Demultiplexed sequence data.', rendered) + self.assertIn('barcodes: (MetadataColumn[Categorical]) pipeline input "barcodes"', rendered) + self.assertIn('Barcode metadata column.', rendered) + self.assertIn( + 'barcodes: (MetadataColumn[Categorical]) metadata column from pipeline input "barcodes" using pipeline parameter "barcodes"', + rendered, + ) + self.assertIn('Column used to find barcode values.', rendered) + self.assertIn( + 'demultiplexed_seqs: (SampleData[SequencesWithQuality]) demux.emp_single.per_sample_sequences', + rendered, + ) + self.assertIn('Per-sample demultiplexed sequences.', rendered) + self.assertIn('trim_left: (Int) pipeline parameter "trim_left"', rendered) + self.assertIn('Trim this many bases from the start of each read.', rendered) + self.assertIn('table (FeatureTable[Frequency])', rendered) + self.assertIn('Denoised feature table.', rendered) + + def test_pipeline_show_cli_prints_summary(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_path = Path(tmpdir) / "pipeline.json" + payload = {"spec": _sample_pipeline_dict()} + pipeline_path.write_text(json.dumps(payload), encoding="utf-8") + + result = subprocess.run( + [sys.executable, "-m", "adagio.cli.main", "pipeline", "show", str(pipeline_path)], + capture_output=True, + check=False, + text=True, + ) + + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("demux.emp_single", result.stdout) + self.assertIn("dada2.denoise_single", result.stdout) + self.assertIn("Inputs:", result.stdout) + self.assertIn('barcodes: (MetadataColumn[Categorical]) pipeline input "barcodes"', result.stdout) + self.assertIn('table (FeatureTable[Frequency])', result.stdout) + + +if __name__ == "__main__": + unittest.main() From adaa841d2dd92b431d48d48ee8c0031195554eb0 Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 27 Mar 2026 14:56:09 -0700 Subject: [PATCH 35/44] Writes files to disk as they are created --- src/adagio/executors/serial_runner.py | 45 ++++-- src/adagio/executors/task_environments.py | 18 ++- tests/test_serial_runner.py | 177 ++++++++++++++++++++++ 3 files changed, 226 insertions(+), 14 deletions(-) create mode 100644 tests/test_serial_runner.py diff --git a/src/adagio/executors/serial_runner.py b/src/adagio/executors/serial_runner.py index f70ef20..ddbd3ed 100644 --- a/src/adagio/executors/serial_runner.py +++ b/src/adagio/executors/serial_runner.py @@ -1,6 +1,6 @@ import tempfile import typing as t -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from rich.console import Console @@ -25,6 +25,8 @@ class SerialExecutionState: params: dict[str, t.Any] scope: dict[str, str] cache_config: ExecutionCacheConfig | None + saved_output_ids: set[str] = field(default_factory=set) + save_output_started: bool = False def run_serial_pipeline( @@ -32,7 +34,9 @@ def run_serial_pipeline( pipeline: AdagioPipeline, arguments: AdagioArguments, resolve_task: t.Callable[[t.Any, SerialExecutionState, Console | None], bool], - finish_outputs: t.Callable[[t.Any, AdagioArguments, SerialExecutionState, Monitor | None], None], + finish_outputs: t.Callable[ + [t.Any, AdagioArguments, SerialExecutionState, Monitor | None, bool], None + ], console: Console | None = None, monitor: Monitor | None = None, total_subtasks: int = CONTAINER_SUBTASK_COUNT, @@ -76,6 +80,13 @@ def run_serial_pipeline( active_monitor.start_task(task_id=task.id) try: reused = resolve_task(task, state, console) + finish_outputs( + sig=sig, + arguments=arguments, + state=state, + monitor=active_monitor, + require_all=False, + ) active_monitor.advance_task(task_id=task.id, advance=1) active_monitor.finish_task( task_id=task.id, @@ -83,25 +94,35 @@ def run_serial_pipeline( ) completed_task_ids.add(task.id) except Exception as exc: # noqa: BLE001 - active_monitor.finish_task(task_id=task.id, status="failed", error=str(exc)) + active_monitor.finish_task( + task_id=task.id, status="failed", error=str(exc) + ) for skipped_task in tasks: - if skipped_task.id == task.id or skipped_task.id in completed_task_ids: + if ( + skipped_task.id == task.id + or skipped_task.id in completed_task_ids + ): continue active_monitor.finish_task( task_id=skipped_task.id, status="skipped", error=f"Skipped because task {task.id!r} failed.", ) + if state.save_output_started: + active_monitor.finish_save_output() raise - active_monitor.start_save_output() - finish_outputs( - sig=sig, - arguments=arguments, - state=state, - monitor=active_monitor, - ) - active_monitor.finish_save_output() + try: + finish_outputs( + sig=sig, + arguments=arguments, + state=state, + monitor=active_monitor, + require_all=True, + ) + finally: + if state.save_output_started: + active_monitor.finish_save_output() finally: active_monitor.finish_pipeline() diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py index 4401f85..0cf9693 100644 --- a/src/adagio/executors/task_environments.py +++ b/src/adagio/executors/task_environments.py @@ -111,7 +111,9 @@ def _execute_plugin_action( elif column.kind == "promoted": column_name = str(state.params[column.id]) else: - raise TypeError(f"Unsupported metadata column kind: {column.kind!r}") + raise TypeError( + f"Unsupported metadata column kind: {column.kind!r}" + ) metadata_column_kwargs[name] = {"source": name, "column": column_name} else: raise TypeError(f"Unsupported parameter kind: {param.kind!r}") @@ -164,13 +166,20 @@ def _save_outputs( arguments: AdagioArguments, state: SerialExecutionState, monitor: Monitor | None, + require_all: bool = True, ) -> None: if isinstance(arguments.outputs, str): os.makedirs(arguments.outputs, exist_ok=True) for output in sig.outputs: + if output.id in state.saved_output_ids: + continue if output.id not in state.scope: - raise KeyError(f"Missing output value for {output.name!r} ({output.id}).") + if require_all: + raise KeyError( + f"Missing output value for {output.name!r} ({output.id})." + ) + continue source_path = Path(state.scope[output.id]) destination = resolve_output_destination( @@ -184,6 +193,10 @@ def _save_outputs( if parent: os.makedirs(parent, exist_ok=True) + if monitor is not None and not state.save_output_started: + monitor.start_save_output() + state.save_output_started = True + try: shutil.copy2(source_path, destination) except Exception as exc: # noqa: BLE001 @@ -204,3 +217,4 @@ def _save_outputs( destination=destination, status="succeeded", ) + state.saved_output_ids.add(output.id) diff --git a/tests/test_serial_runner.py b/tests/test_serial_runner.py new file mode 100644 index 0000000..8ccad51 --- /dev/null +++ b/tests/test_serial_runner.py @@ -0,0 +1,177 @@ +import tempfile +import unittest +from dataclasses import dataclass, field +from pathlib import Path + +from adagio.executors.serial_runner import run_serial_pipeline +from adagio.executors.task_environments import _save_outputs +from adagio.model.arguments import AdagioArguments +from adagio.monitor.api import Monitor + + +@dataclass(frozen=True) +class FakeEndpoint: + id: str + + +@dataclass(frozen=True) +class FakeOutputDef: + id: str + name: str + + +@dataclass +class FakeTask: + id: str + outputs: dict[str, FakeEndpoint] + kind: str = "plugin-action" + plugin: str = "dummy" + action: str = "action" + inputs: dict[str, FakeEndpoint] = field(default_factory=dict) + + +class FakeSignature: + def __init__(self, outputs: list[FakeOutputDef]) -> None: + self.inputs: list[object] = [] + self.parameters: list[object] = [] + self.outputs = outputs + + def validate_arguments(self, arguments: AdagioArguments) -> None: + del arguments + + def get_params(self, arguments: AdagioArguments) -> dict[str, object]: + del arguments + return {} + + +class FakePipeline: + def __init__(self, *, tasks: list[FakeTask], outputs: list[FakeOutputDef]) -> None: + self.signature = FakeSignature(outputs) + self._tasks = tasks + + def validate_graph(self) -> None: + return None + + def iter_tasks(self): + return iter(self._tasks) + + +class RecordingMonitor(Monitor): + def __init__(self) -> None: + self.save_start_count = 0 + self.save_finish_count = 0 + self.saved_outputs: list[tuple[str, str, str, str]] = [] + + def start_save_output(self) -> None: + self.save_start_count += 1 + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + del error + self.saved_outputs.append((output_id, output_name, destination, status)) + + def finish_save_output(self) -> None: + self.save_finish_count += 1 + + +class SerialRunnerOutputTests(unittest.TestCase): + def test_preserves_completed_output_when_later_task_fails(self) -> None: + output_def = FakeOutputDef(id="out-1", name="result") + pipeline = FakePipeline( + tasks=[ + FakeTask(id="task-1", outputs={"result": FakeEndpoint("out-1")}), + FakeTask(id="task-2", outputs={"other": FakeEndpoint("out-2")}), + ], + outputs=[output_def], + ) + monitor = RecordingMonitor() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + output_dir = root / "outputs" + arguments = AdagioArguments( + inputs={}, parameters={}, outputs=str(output_dir) + ) + + def resolve_task(task, state, console): # noqa: ANN001 + del console + if task.id == "task-1": + produced = state.work_path / "task-1_result.qza" + produced.write_text("done", encoding="utf-8") + state.scope["out-1"] = str(produced) + return False + raise RuntimeError("task 2 failed") + + with self.assertRaisesRegex(RuntimeError, "task 2 failed"): + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=resolve_task, + finish_outputs=_save_outputs, + monitor=monitor, + ) + + saved_path = output_dir / "result.qza" + self.assertTrue(saved_path.exists()) + self.assertEqual(saved_path.read_text(encoding="utf-8"), "done") + self.assertEqual(monitor.save_start_count, 1) + self.assertEqual(monitor.save_finish_count, 1) + + def test_saves_each_output_only_once_across_multiple_tasks(self) -> None: + output_def = FakeOutputDef(id="out-1", name="result") + pipeline = FakePipeline( + tasks=[ + FakeTask(id="task-1", outputs={"result": FakeEndpoint("out-1")}), + FakeTask(id="task-2", outputs={"other": FakeEndpoint("out-2")}), + ], + outputs=[output_def], + ) + monitor = RecordingMonitor() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + output_dir = root / "outputs" + arguments = AdagioArguments( + inputs={}, parameters={}, outputs=str(output_dir) + ) + + def resolve_task(task, state, console): # noqa: ANN001 + del console + if task.id == "task-1": + produced = state.work_path / "task-1_result.qza" + produced.write_text("done", encoding="utf-8") + state.scope["out-1"] = str(produced) + return False + produced = state.work_path / "task-2_other.qza" + produced.write_text("other", encoding="utf-8") + state.scope["out-2"] = str(produced) + return False + + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=resolve_task, + finish_outputs=_save_outputs, + monitor=monitor, + ) + + self.assertEqual( + monitor.saved_outputs, + [ + ( + output_def.id, + output_def.name, + str(output_dir / "result.qza"), + "succeeded", + ) + ], + ) + self.assertEqual(monitor.save_start_count, 1) + self.assertEqual(monitor.save_finish_count, 1) From 6dd6b176aedd5e4fe283bc015cdecda612a3a73e Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 30 Mar 2026 21:38:51 -0700 Subject: [PATCH 36/44] Adds collection support --- src/adagio/cli/task_exec.py | 7 +++ src/adagio/describe.py | 21 +++++++- src/adagio/executors/apptainer.py | 11 +++- src/adagio/executors/base.py | 1 + src/adagio/executors/common.py | 16 +++++- src/adagio/executors/docker.py | 11 +++- src/adagio/executors/task_contract.py | 2 + src/adagio/executors/task_environments.py | 11 ++-- src/adagio/model/task.py | 25 ++++++++- tests/test_apptainer_launcher.py | 3 ++ tests/test_docker_launcher.py | 64 ++++++++++++++--------- tests/test_pipeline_show.py | 59 +++++++++++++++++++++ 12 files changed, 196 insertions(+), 35 deletions(-) diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py index 375af4a..6a69fd5 100644 --- a/src/adagio/cli/task_exec.py +++ b/src/adagio/cli/task_exec.py @@ -37,6 +37,7 @@ def _run_task(spec: dict[str, Any]) -> None: plugin_name: str = spec["plugin"] action_name: str = spec["action"] archive_inputs: dict[str, str] = spec.get("archive_inputs", {}) + archive_collection_inputs: dict[str, list[str]] = spec.get("archive_collection_inputs", {}) metadata_inputs: dict[str, str] = spec.get("metadata_inputs", {}) params: dict[str, Any] = spec.get("params", {}) metadata_column_kwargs: dict[str, dict[str, str]] = spec.get("metadata_column_kwargs", {}) @@ -74,6 +75,12 @@ def _run_task(spec: dict[str, Any]) -> None: loaded = Artifact.load(path) kwargs[name] = _cache_loaded_input(cache=cache, value=loaded) + for name, paths in archive_collection_inputs.items(): + kwargs[name] = [ + _cache_loaded_input(cache=cache, value=Artifact.load(path)) + for path in paths + ] + loaded_metadata: dict[str, Metadata] = {} for name, path in metadata_inputs.items(): if zipfile.is_zipfile(path): diff --git a/src/adagio/describe.py b/src/adagio/describe.py index e79aa2c..484e920 100644 --- a/src/adagio/describe.py +++ b/src/adagio/describe.py @@ -9,7 +9,13 @@ from .cli.dynamic import _compact_type_text from .executors.common import plan_execution_order from .model.pipeline import AdagioPipeline -from .model.task import LiteralVal, MetadataVal, PluginActionTask, PromotedVal, RootInputTask +from .model.task import ( + LiteralVal, + MetadataVal, + PluginActionTask, + PromotedVal, + RootInputTask, +) @dataclass(frozen=True) @@ -127,6 +133,19 @@ def _append_input_lines( return for input_name, source in task.inputs.items(): + if source.kind == "archive-collection": + labels = [ + available_ids.get(item.id, _unknown_reference(item.id)).label + for item in source.items + ] + _append_entry_line( + rendered, + name=input_name, + type_label="list", + value_text=f"[{', '.join(labels)}]", + description=None, + ) + continue reference = available_ids.get(source.id, _unknown_reference(source.id)) _append_entry_line( rendered, diff --git a/src/adagio/executors/apptainer.py b/src/adagio/executors/apptainer.py index c7408da..97f277c 100644 --- a/src/adagio/executors/apptainer.py +++ b/src/adagio/executors/apptainer.py @@ -49,6 +49,10 @@ def launch( name: containerize_host_value(value) for name, value in request.archive_inputs.items() } + archive_collection_inputs = { + name: [containerize_host_value(value) for value in values] + for name, values in request.archive_collection_inputs.items() + } metadata_inputs = { name: containerize_host_value(value) for name, value in request.metadata_inputs.items() @@ -66,6 +70,7 @@ def launch( plugin=task.plugin, action=task.action, archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, metadata_inputs=metadata_inputs, params=dict(request.params), metadata_column_kwargs=dict(request.metadata_column_kwargs), @@ -91,8 +96,10 @@ def launch( ] host_paths = [request.cwd, request.work_path, python_root] - for value in list(request.archive_inputs.values()) + list( - request.metadata_inputs.values() + for value in ( + list(request.archive_inputs.values()) + + [item for values in request.archive_collection_inputs.values() for item in values] + + list(request.metadata_inputs.values()) ): if is_uri(value): continue diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py index 9446a9a..eb650c6 100644 --- a/src/adagio/executors/base.py +++ b/src/adagio/executors/base.py @@ -47,6 +47,7 @@ class TaskExecutionRequest: cwd: Path work_path: Path archive_inputs: Mapping[str, str] + archive_collection_inputs: Mapping[str, list[str]] metadata_inputs: Mapping[str, str] params: Mapping[str, Any] metadata_column_kwargs: Mapping[str, Mapping[str, str]] diff --git a/src/adagio/executors/common.py b/src/adagio/executors/common.py index 6e4e884..5f5b1eb 100644 --- a/src/adagio/executors/common.py +++ b/src/adagio/executors/common.py @@ -1,5 +1,7 @@ import typing as t +from adagio.model.task import input_source_ids + def plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list[t.Any]: """Return a dependency-respecting serial execution plan.""" @@ -10,7 +12,12 @@ def plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list while remaining: progressed = False for task in list(remaining): - missing = [src.id for src in task.inputs.values() if src.id not in available_ids] + missing = [ + source_id + for src in task.inputs.values() + for source_id in input_source_ids(src) + if source_id not in available_ids + ] if missing: continue @@ -23,7 +30,12 @@ def plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list if not progressed: details = [] for task in remaining: - missing = ", ".join(src.id for src in task.inputs.values() if src.id not in available_ids) + missing = ", ".join( + source_id + for src in task.inputs.values() + for source_id in input_source_ids(src) + if source_id not in available_ids + ) details.append(f"{task.id}: missing [{missing}]") raise RuntimeError("Unable to resolve task dependencies. " + "; ".join(details)) diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index 176c62b..2d7898e 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -46,6 +46,10 @@ def launch( name: containerize_host_value(value) for name, value in request.archive_inputs.items() } + archive_collection_inputs = { + name: [containerize_host_value(value) for value in values] + for name, values in request.archive_collection_inputs.items() + } metadata_inputs = { name: containerize_host_value(value) for name, value in request.metadata_inputs.items() @@ -61,6 +65,7 @@ def launch( plugin=task.plugin, action=task.action, archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, metadata_inputs=metadata_inputs, params=dict(request.params), metadata_column_kwargs=dict(request.metadata_column_kwargs), @@ -107,7 +112,11 @@ def launch( ]) host_paths = [request.cwd, request.work_path, python_root] - for value in list(request.archive_inputs.values()) + list(request.metadata_inputs.values()): + for value in ( + list(request.archive_inputs.values()) + + [item for values in request.archive_collection_inputs.values() for item in values] + + list(request.metadata_inputs.values()) + ): if is_uri(value): continue path = Path(value) diff --git a/src/adagio/executors/task_contract.py b/src/adagio/executors/task_contract.py index fc1b05b..3998922 100644 --- a/src/adagio/executors/task_contract.py +++ b/src/adagio/executors/task_contract.py @@ -34,6 +34,7 @@ def build_task_spec( plugin: str, action: str, archive_inputs: dict[str, str], + archive_collection_inputs: dict[str, list[str]], metadata_inputs: dict[str, str], params: dict[str, Any], metadata_column_kwargs: dict[str, dict[str, str]], @@ -46,6 +47,7 @@ def build_task_spec( "plugin": plugin, "action": action, "archive_inputs": archive_inputs, + "archive_collection_inputs": archive_collection_inputs, "metadata_inputs": metadata_inputs, "params": params, "metadata_column_kwargs": metadata_column_kwargs, diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py index 0cf9693..2654b4e 100644 --- a/src/adagio/executors/task_environments.py +++ b/src/adagio/executors/task_environments.py @@ -87,13 +87,17 @@ def _execute_plugin_action( ) archive_inputs: dict[str, str] = {} + archive_collection_inputs: dict[str, list[str]] = {} metadata_inputs: dict[str, str] = {} for name, src in task.inputs.items(): - value = state.scope[src.id] if src.kind == "archive": - archive_inputs[name] = value + archive_inputs[name] = state.scope[src.id] + elif src.kind == "archive-collection": + archive_collection_inputs[name] = [ + state.scope[item.id] for item in src.items + ] elif src.kind == "metadata": - metadata_inputs[name] = value + metadata_inputs[name] = state.scope[src.id] else: raise TypeError(f"Unsupported input kind: {src.kind!r}") @@ -128,6 +132,7 @@ def _execute_plugin_action( cwd=state.cwd, work_path=state.work_path, archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, metadata_inputs=metadata_inputs, params=resolved_params, metadata_column_kwargs=metadata_column_kwargs, diff --git a/src/adagio/model/task.py b/src/adagio/model/task.py index 997f2af..f932d68 100644 --- a/src/adagio/model/task.py +++ b/src/adagio/model/task.py @@ -5,7 +5,7 @@ class _BaseTask(BaseModel): id: str kind: str - inputs: dict[str, 'InputVal'] + inputs: dict[str, 'TaskInputVal'] parameters: dict[str, 'LiteralVal | MetadataVal | PromotedVal'] outputs: dict[str, 'OutputVal'] @@ -29,6 +29,8 @@ def exec(self, ctx, params, scope): for name, src in self.inputs.items(): if src.kind == 'archive': kwargs[name] = scope[src.id] + elif src.kind == 'archive-collection': + kwargs[name] = [scope[item.id] for item in src.items] elif src.kind == 'metadata': # store for second pass in params metadata[name] = scope[src.id] @@ -81,6 +83,17 @@ class InputVal(BaseModel): id: str +class ArchiveCollectionItemVal(BaseModel): + key: str + id: str + + +class ArchiveCollectionInputVal(BaseModel): + kind: t.Literal['archive-collection'] + style: t.Literal['list'] + items: list[ArchiveCollectionItemVal] + + class OutputVal(BaseModel): kind: t.Literal['archive'] id: str @@ -108,5 +121,15 @@ class MetadataVal(BaseModel): Primitive = int | float | str | bool | t.Literal[None] Collection = list[Primitive] | dict[str, Primitive] AllowableValue = Primitive | Collection +TaskInputVal = t.Annotated[ + t.Union[InputVal, ArchiveCollectionInputVal], + Field(discriminator='kind') +] AdagioTask = t.Annotated[t.Union[PluginActionTask, RootInputTask], Field(discriminator='kind')] + + +def input_source_ids(value: TaskInputVal) -> list[str]: + if value.kind == 'archive-collection': + return [item.id for item in value.items] + return [value.id] diff --git a/tests/test_apptainer_launcher.py b/tests/test_apptainer_launcher.py index 59bc16d..5032d2a 100644 --- a/tests/test_apptainer_launcher.py +++ b/tests/test_apptainer_launcher.py @@ -56,6 +56,7 @@ def test_launch_builds_apptainer_exec_command(self) -> None: cwd=cwd, work_path=work_path, archive_inputs={"seqs": str(input_path)}, + archive_collection_inputs={}, metadata_inputs={}, params={}, metadata_column_kwargs={}, @@ -143,6 +144,7 @@ def test_launch_falls_back_to_singularity(self) -> None: cwd=cwd, work_path=work_path, archive_inputs={}, + archive_collection_inputs={}, metadata_inputs={}, params={}, metadata_column_kwargs={}, @@ -195,6 +197,7 @@ def test_launch_rejects_non_local_image_reference(self) -> None: cwd=cwd, work_path=work_path, archive_inputs={}, + archive_collection_inputs={}, metadata_inputs={}, params={}, metadata_column_kwargs={}, diff --git a/tests/test_docker_launcher.py b/tests/test_docker_launcher.py index cb318cd..e41d4b6 100644 --- a/tests/test_docker_launcher.py +++ b/tests/test_docker_launcher.py @@ -13,6 +13,7 @@ from adagio.executors.docker import DockerTaskEnvironmentLauncher from adagio.executors.task_contract import ( build_result_manifest, + read_json_file, result_manifest_path, task_spec_path, write_json_file, @@ -47,13 +48,18 @@ def test_launch_builds_docker_run_command(self) -> None: work_path.mkdir() output_path = work_path / "summary.qzv" input_path = cwd / "input.qza" + collection_input_path = cwd / "collection-input.qza" input_path.write_text("input", encoding="utf-8") + collection_input_path.write_text("collection", encoding="utf-8") request = TaskExecutionRequest( task=task, cwd=cwd, work_path=work_path, archive_inputs={"data": str(input_path)}, + archive_collection_inputs={ + "tables": [str(collection_input_path)] + }, metadata_inputs={}, params={}, metadata_column_kwargs={}, @@ -87,29 +93,37 @@ def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 request=request, ) - command = run_mock.call_args.args[0] - python_root = container_python_root(work_path=work_path) - bind_targets = { - f"{root_path}:{containerize_path(root_path)}:rw" - for root_path in mount_roots([cwd, work_path, input_path, python_root]) - } + task_spec = read_json_file(task_spec_path(task_id=task.id, work_path=work_path)) + + command = run_mock.call_args.args[0] + python_root = container_python_root(work_path=work_path) + bind_targets = { + f"{root_path}:{containerize_path(root_path)}:rw" + for root_path in mount_roots( + [cwd, work_path, input_path, collection_input_path, python_root] + ) + } - self.assertEqual(command[0], "docker") - self.assertEqual(command[1], "run") - self.assertEqual(command[2], "--rm") - self.assertIn("-w", command) - self.assertIn(containerize_path(cwd), command) - self.assertIn( - f"PYTHONPATH={containerize_path(python_root)}", - command, - ) - self.assertIn("PYTHONNOUSERSITE=1", command) - self.assertIn("python", command) - self.assertIn("-m", command) - self.assertIn("adagio.cli.task_exec", command) - self.assertIn("--task", command) - self.assertIn(expected_spec, command) - self.assertIn("ghcr.io/cymis/qiime2-plugin-demux:2026.1", command) - self.assertTrue(bind_targets.issubset(set(command))) - self.assertEqual(result.outputs, {"visualization": str(output_path)}) - self.assertFalse(result.reused) + self.assertEqual(command[0], "docker") + self.assertEqual(command[1], "run") + self.assertEqual(command[2], "--rm") + self.assertIn("-w", command) + self.assertIn(containerize_path(cwd), command) + self.assertIn( + f"PYTHONPATH={containerize_path(python_root)}", + command, + ) + self.assertIn("PYTHONNOUSERSITE=1", command) + self.assertIn("python", command) + self.assertIn("-m", command) + self.assertIn("adagio.cli.task_exec", command) + self.assertIn("--task", command) + self.assertIn(expected_spec, command) + self.assertIn("ghcr.io/cymis/qiime2-plugin-demux:2026.1", command) + self.assertTrue(bind_targets.issubset(set(command))) + self.assertEqual( + task_spec["archive_collection_inputs"], + {"tables": [containerize_path(collection_input_path)]}, + ) + self.assertEqual(result.outputs, {"visualization": str(output_path)}) + self.assertFalse(result.reused) diff --git a/tests/test_pipeline_show.py b/tests/test_pipeline_show.py index 324e899..4324f9c 100644 --- a/tests/test_pipeline_show.py +++ b/tests/test_pipeline_show.py @@ -120,6 +120,54 @@ def _sample_pipeline_dict() -> dict: } +def _collection_pipeline_dict() -> dict: + return { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-table-a", + "name": "table_a", + "type": "FeatureTable[Frequency]", + "ast": AST, + "required": True, + "description": "First table.", + }, + { + "id": "input-table-b", + "name": "table_b", + "type": "FeatureTable[Frequency]", + "ast": AST, + "required": True, + "description": "Second table.", + }, + ], + "parameters": [], + "outputs": [], + }, + "graph": [ + { + "id": "task-merge", + "kind": "plugin-action", + "plugin": "feature_table", + "action": "merge", + "inputs": { + "tables": { + "kind": "archive-collection", + "style": "list", + "items": [ + {"key": "0", "id": "input-table-a"}, + {"key": "1", "id": "input-table-b"}, + ], + } + }, + "parameters": {}, + "outputs": {}, + } + ], + } + + def _render_plain(renderable: object) -> str: console = Console(record=True, width=160, file=io.StringIO()) console.print(renderable, soft_wrap=True) @@ -177,6 +225,17 @@ def test_pipeline_show_cli_prints_summary(self) -> None: self.assertIn('barcodes: (MetadataColumn[Categorical]) pipeline input "barcodes"', result.stdout) self.assertIn('table (FeatureTable[Frequency])', result.stdout) + def test_render_pipeline_text_displays_collection_inputs(self) -> None: + pipeline = AdagioPipeline.model_validate(_collection_pipeline_dict()) + + rendered = _render_plain(render_pipeline_text(pipeline)) + + self.assertIn("feature_table.merge", rendered) + self.assertIn( + 'tables: list [pipeline input "table_a", pipeline input "table_b"]', + rendered, + ) + if __name__ == "__main__": unittest.main() From 6a425ba48c18e66e15bb29af2bf8c0a89c6abf95 Mon Sep 17 00:00:00 2001 From: John Chase Date: Thu, 2 Apr 2026 21:57:49 -0700 Subject: [PATCH 37/44] fix flicker --- src/adagio/monitor/tty.py | 48 ++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index f8c4fc1..17ddc12 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -13,6 +13,7 @@ BAR_WIDTH = 28 COUNTER_WIDTH = 5 ELAPSED_WIDTH = 4 +ELAPSED_REFRESH_POLL_SECONDS = 0.2 @dataclass @@ -25,6 +26,7 @@ class _TaskState: error: str | None = None started_at: float | None = None finished_at: float | None = None + last_rendered_elapsed_seconds: int | None = None class RichMonitor(Monitor): @@ -36,6 +38,7 @@ def __init__(self, *, console: Console | None = None): self._progress = Progress( TextColumn("{task.fields[row]}"), console=self._console, + auto_refresh=False, expand=True, transient=False, ) @@ -60,8 +63,8 @@ def start_pipeline(self, *, total_tasks: int = 0) -> None: self._pipeline_started = True self._total_tasks = total_tasks self._stop_refresh.clear() - self._progress.start() self._console.print("[bold]Task Progress[/bold]") + self._progress.start() self._refresh_thread = threading.Thread( target=self._refresh_loop, name="adagio-rich-monitor", @@ -161,23 +164,28 @@ def _refresh_row(self, task: _TaskState, *, refresh: bool = True) -> None: completed=task.completed_subtasks, row=self._render_row(task), ) + task.last_rendered_elapsed_seconds = _elapsed_seconds(task) if refresh: self._progress.refresh() def _refresh_loop(self) -> None: """Refresh running task rows so elapsed time stays current.""" - while not self._stop_refresh.wait(0.5): + while not self._stop_refresh.wait(ELAPSED_REFRESH_POLL_SECONDS): with self._lock: - running = [ - task - for task in self._task_lookup.values() - if task.status == "running" - ] - if not running: - continue - for task in running: - self._refresh_row(task, refresh=False) - self._progress.refresh() + self._refresh_running_rows() + + def _refresh_running_rows(self) -> None: + """Refresh only rows whose displayed elapsed time has changed.""" + needs_refresh = False + for task in self._task_lookup.values(): + if task.status != "running": + continue + if _elapsed_seconds(task) == task.last_rendered_elapsed_seconds: + continue + self._refresh_row(task, refresh=False) + needs_refresh = True + if needs_refresh: + self._progress.refresh() def _render_row(self, task: _TaskState) -> str: """Build a compact row for a task.""" @@ -231,12 +239,16 @@ def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: def _elapsed(task: _TaskState) -> str: """Format elapsed task time as M:SS.""" + seconds = _elapsed_seconds(task) + minutes, sec = divmod(seconds, 60) + return f"{minutes}:{sec:02d}" + + +def _elapsed_seconds(task: _TaskState) -> int: + """Return elapsed task time in whole seconds.""" start = task.started_at if start is None: - return "0:00" + return 0 if task.finished_at is not None: - seconds = max(0, int(task.finished_at - start)) - else: - seconds = max(0, int(time.monotonic() - start)) - minutes, sec = divmod(seconds, 60) - return f"{minutes}:{sec:02d}" + return max(0, int(task.finished_at - start)) + return max(0, int(time.monotonic() - start)) From 421eee836c1eaaa1f79aea6364df2798af813dd3 Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 3 Apr 2026 10:26:45 -0700 Subject: [PATCH 38/44] Second attempt at fixing flicker --- src/adagio/monitor/tty.py | 161 ++++++++++++++------------------------ 1 file changed, 60 insertions(+), 101 deletions(-) diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 17ddc12..33625a0 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,5 +1,4 @@ import re -import threading import time from dataclasses import dataclass @@ -13,7 +12,6 @@ BAR_WIDTH = 28 COUNTER_WIDTH = 5 ELAPSED_WIDTH = 4 -ELAPSED_REFRESH_POLL_SECONDS = 0.2 @dataclass @@ -26,7 +24,6 @@ class _TaskState: error: str | None = None started_at: float | None = None finished_at: float | None = None - last_rendered_elapsed_seconds: int | None = None class RichMonitor(Monitor): @@ -49,113 +46,93 @@ def __init__(self, *, console: Console | None = None): "failed": 0, "skipped": 0, } - self._lock = threading.RLock() - self._stop_refresh = threading.Event() - self._refresh_thread: threading.Thread | None = None self._pipeline_started = False self._total_tasks = 0 def start_pipeline(self, *, total_tasks: int = 0) -> None: """Start rendering pipeline progress.""" - with self._lock: - if self._pipeline_started: - return - self._pipeline_started = True - self._total_tasks = total_tasks - self._stop_refresh.clear() - self._console.print("[bold]Task Progress[/bold]") - self._progress.start() - self._refresh_thread = threading.Thread( - target=self._refresh_loop, - name="adagio-rich-monitor", - daemon=True, - ) - self._refresh_thread.start() + if self._pipeline_started: + return + self._pipeline_started = True + self._total_tasks = total_tasks + self._console.print("[bold]Task Progress[/bold]") + self._progress.start() def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: """Queue a task row in the progress view.""" - with self._lock: - total = max(total_subtasks, 1) - state = _TaskState( - progress_task_id=-1, - label=label, - total_subtasks=total, - ) - row = self._render_row(state) - progress_task_id = self._progress.add_task( - description="", - total=total, - completed=0, - row=row, - ) - state.progress_task_id = progress_task_id - self._task_lookup[task_id] = state + total = max(total_subtasks, 1) + state = _TaskState( + progress_task_id=-1, + label=label, + total_subtasks=total, + ) + row = self._render_row(state) + progress_task_id = self._progress.add_task( + description="", + total=total, + completed=0, + row=row, + ) + state.progress_task_id = progress_task_id + self._task_lookup[task_id] = state def start_task(self, *, task_id: str) -> None: """Mark a task as running.""" - with self._lock: - task = self._task_lookup.get(task_id) - if task is None: - return - task.status = "running" - task.started_at = time.monotonic() - self._refresh_row(task, refresh=False) - self._progress.refresh() + task = self._task_lookup.get(task_id) + if task is None: + return + task.status = "running" + task.started_at = time.monotonic() + self._refresh_row(task, refresh=False) + self._progress.refresh() def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: """Advance a task's subtask progress.""" del message - with self._lock: - task = self._task_lookup.get(task_id) - if task is None: - return - task.completed_subtasks = min( - task.total_subtasks, task.completed_subtasks + max(advance, 0) - ) - self._refresh_row(task) + task = self._task_lookup.get(task_id) + if task is None: + return + task.completed_subtasks = min( + task.total_subtasks, task.completed_subtasks + max(advance, 0) + ) + self._refresh_row(task) def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: """Mark a task as finished.""" - with self._lock: - task = self._task_lookup.get(task_id) - if task is None: - return - - task.status = status - task.error = error - task.finished_at = time.monotonic() - if status in {"completed", "cached", "skipped"}: - task.completed_subtasks = task.total_subtasks - if status in self._status_counts: - self._status_counts[status] += 1 - self._refresh_row(task) + task = self._task_lookup.get(task_id) + if task is None: + return + + task.status = status + task.error = error + task.finished_at = time.monotonic() + if status in {"completed", "cached", "skipped"}: + task.completed_subtasks = task.total_subtasks + if status in self._status_counts: + self._status_counts[status] += 1 + self._refresh_row(task) def finish_pipeline(self) -> None: """Stop rendering and print a summary.""" if not self._pipeline_started: return - self._stop_refresh.set() - if self._refresh_thread is not None: - self._refresh_thread.join(timeout=1.0) - self._refresh_thread = None - with self._lock: - self._progress.stop() - pending = self._total_tasks - sum(self._status_counts.values()) - self._console.print( - "Summary: " - f"{self._status_counts['completed']} completed, " - f"{self._status_counts['cached']} cached, " - f"{self._status_counts['failed']} failed, " - f"{self._status_counts['skipped']} skipped, " - f"{max(pending, 0)} pending" - ) - self._pipeline_started = False + self._progress.stop() + pending = self._total_tasks - sum(self._status_counts.values()) + self._console.print( + "Summary: " + f"{self._status_counts['completed']} completed, " + f"{self._status_counts['cached']} cached, " + f"{self._status_counts['failed']} failed, " + f"{self._status_counts['skipped']} skipped, " + f"{max(pending, 0)} pending" + ) + self._pipeline_started = False def _refresh_row(self, task: _TaskState, *, refresh: bool = True) -> None: """Refresh a rendered task row.""" @@ -164,29 +141,9 @@ def _refresh_row(self, task: _TaskState, *, refresh: bool = True) -> None: completed=task.completed_subtasks, row=self._render_row(task), ) - task.last_rendered_elapsed_seconds = _elapsed_seconds(task) if refresh: self._progress.refresh() - def _refresh_loop(self) -> None: - """Refresh running task rows so elapsed time stays current.""" - while not self._stop_refresh.wait(ELAPSED_REFRESH_POLL_SECONDS): - with self._lock: - self._refresh_running_rows() - - def _refresh_running_rows(self) -> None: - """Refresh only rows whose displayed elapsed time has changed.""" - needs_refresh = False - for task in self._task_lookup.values(): - if task.status != "running": - continue - if _elapsed_seconds(task) == task.last_rendered_elapsed_seconds: - continue - self._refresh_row(task, refresh=False) - needs_refresh = True - if needs_refresh: - self._progress.refresh() - def _render_row(self, task: _TaskState) -> str: """Build a compact row for a task.""" badge_text, color = _status_style(task.status) @@ -239,6 +196,8 @@ def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: def _elapsed(task: _TaskState) -> str: """Format elapsed task time as M:SS.""" + if task.started_at is not None and task.finished_at is None: + return "..." seconds = _elapsed_seconds(task) minutes, sec = divmod(seconds, 60) return f"{minutes}:{sec:02d}" From 1c5d563658308e69f692fdad67d887bad8dfcfe3 Mon Sep 17 00:00:00 2001 From: John Chase Date: Fri, 3 Apr 2026 11:22:02 -0700 Subject: [PATCH 39/44] Next attempt at render --- src/adagio/executors/apptainer.py | 3 +- src/adagio/executors/docker.py | 3 +- src/adagio/monitor/tty.py | 230 ++++++++++++++++++++---------- 3 files changed, 159 insertions(+), 77 deletions(-) diff --git a/src/adagio/executors/apptainer.py b/src/adagio/executors/apptainer.py index 97f277c..b064ade 100644 --- a/src/adagio/executors/apptainer.py +++ b/src/adagio/executors/apptainer.py @@ -127,7 +127,8 @@ def launch( if console is not None: label = f"{Path(runtime_executable).name} {image_path}" - console.print(f"[dim]Task environment:[/dim] {label}") + if not getattr(console, "_adagio_inline_monitor_active", False): + console.print(f"[dim]Task environment:[/dim] {label}") try: result = subprocess.run( diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py index 2d7898e..059a871 100644 --- a/src/adagio/executors/docker.py +++ b/src/adagio/executors/docker.py @@ -131,7 +131,8 @@ def launch( label = f"docker {environment.reference}" if platform: label = f"docker --platform {platform} {environment.reference}" - console.print(f"[dim]Task environment:[/dim] {label}") + if not getattr(console, "_adagio_inline_monitor_active", False): + console.print(f"[dim]Task environment:[/dim] {label}") try: result = subprocess.run( diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index 33625a0..85a7794 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -1,9 +1,11 @@ import re +import threading import time from dataclasses import dataclass from rich.console import Console -from rich.progress import Progress, TextColumn +from rich.control import Control +from rich.segment import ControlType from .api import Monitor @@ -12,11 +14,15 @@ BAR_WIDTH = 28 COUNTER_WIDTH = 5 ELAPSED_WIDTH = 4 +ELAPSED_REFRESH_POLL_SECONDS = 0.2 +ELAPSED_COLUMN = ( + BADGE_WIDTH + 1 + LABEL_WIDTH + 1 + BAR_WIDTH + 2 + COUNTER_WIDTH + 2 +) @dataclass class _TaskState: - progress_task_id: int + task_id: str label: str total_subtasks: int completed_subtasks: int = 0 @@ -24,6 +30,7 @@ class _TaskState: error: str | None = None started_at: float | None = None finished_at: float | None = None + last_rendered_elapsed_seconds: int | None = None class RichMonitor(Monitor): @@ -32,117 +39,147 @@ class RichMonitor(Monitor): def __init__(self, *, console: Console | None = None): """Initialize the Rich monitor.""" self._console = console or Console() - self._progress = Progress( - TextColumn("{task.fields[row]}"), - console=self._console, - auto_refresh=False, - expand=True, - transient=False, + self._inline_updates = ( + self._console.is_terminal and not self._console.is_dumb_terminal ) self._task_lookup: dict[str, _TaskState] = {} + self._task_order: list[str] = [] self._status_counts: dict[str, int] = { "completed": 0, "cached": 0, "failed": 0, "skipped": 0, } + self._lock = threading.RLock() + self._stop_refresh = threading.Event() + self._refresh_thread: threading.Thread | None = None self._pipeline_started = False self._total_tasks = 0 def start_pipeline(self, *, total_tasks: int = 0) -> None: """Start rendering pipeline progress.""" - if self._pipeline_started: - return - self._pipeline_started = True - self._total_tasks = total_tasks - self._console.print("[bold]Task Progress[/bold]") - self._progress.start() + with self._lock: + if self._pipeline_started: + return + self._pipeline_started = True + self._total_tasks = total_tasks + self._stop_refresh.clear() + setattr(self._console, "_adagio_inline_monitor_active", self._inline_updates) + if self._inline_updates: + self._console.control(Control.show_cursor(False)) + self._console.print("[bold]Task Progress[/bold]") + if self._inline_updates: + self._refresh_thread = threading.Thread( + target=self._refresh_loop, + name="adagio-rich-monitor", + daemon=True, + ) + self._refresh_thread.start() def queue_task( self, *, task_id: str, label: str, total_subtasks: int = 1 ) -> None: """Queue a task row in the progress view.""" - total = max(total_subtasks, 1) - state = _TaskState( - progress_task_id=-1, - label=label, - total_subtasks=total, - ) - row = self._render_row(state) - progress_task_id = self._progress.add_task( - description="", - total=total, - completed=0, - row=row, - ) - state.progress_task_id = progress_task_id - self._task_lookup[task_id] = state + with self._lock: + total = max(total_subtasks, 1) + state = _TaskState( + task_id=task_id, + label=label, + total_subtasks=total, + ) + self._task_lookup[task_id] = state + self._task_order.append(task_id) + self._print_row(self._render_row(state)) def start_task(self, *, task_id: str) -> None: """Mark a task as running.""" - task = self._task_lookup.get(task_id) - if task is None: - return - task.status = "running" - task.started_at = time.monotonic() - self._refresh_row(task, refresh=False) - self._progress.refresh() + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.status = "running" + task.started_at = time.monotonic() + self._refresh_row(task) def advance_task( self, *, task_id: str, advance: int = 1, message: str | None = None ) -> None: """Advance a task's subtask progress.""" del message - task = self._task_lookup.get(task_id) - if task is None: - return - task.completed_subtasks = min( - task.total_subtasks, task.completed_subtasks + max(advance, 0) - ) - self._refresh_row(task) + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.completed_subtasks = min( + task.total_subtasks, task.completed_subtasks + max(advance, 0) + ) + self._refresh_row(task) def finish_task( self, *, task_id: str, status: str = "completed", error: str | None = None ) -> None: """Mark a task as finished.""" - task = self._task_lookup.get(task_id) - if task is None: - return + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return - task.status = status - task.error = error - task.finished_at = time.monotonic() - if status in {"completed", "cached", "skipped"}: - task.completed_subtasks = task.total_subtasks - if status in self._status_counts: - self._status_counts[status] += 1 - self._refresh_row(task) + task.status = status + task.error = error + task.finished_at = time.monotonic() + if status in {"completed", "cached", "skipped"}: + task.completed_subtasks = task.total_subtasks + if status in self._status_counts: + self._status_counts[status] += 1 + self._refresh_row(task) def finish_pipeline(self) -> None: """Stop rendering and print a summary.""" if not self._pipeline_started: return - self._progress.stop() - pending = self._total_tasks - sum(self._status_counts.values()) - self._console.print( - "Summary: " - f"{self._status_counts['completed']} completed, " - f"{self._status_counts['cached']} cached, " - f"{self._status_counts['failed']} failed, " - f"{self._status_counts['skipped']} skipped, " - f"{max(pending, 0)} pending" - ) - self._pipeline_started = False + self._stop_refresh.set() + if self._refresh_thread is not None: + self._refresh_thread.join(timeout=1.0) + self._refresh_thread = None + with self._lock: + if self._inline_updates: + self._console.control(Control.show_cursor(True)) + setattr(self._console, "_adagio_inline_monitor_active", False) + pending = self._total_tasks - sum(self._status_counts.values()) + self._console.print( + "Summary: " + f"{self._status_counts['completed']} completed, " + f"{self._status_counts['cached']} cached, " + f"{self._status_counts['failed']} failed, " + f"{self._status_counts['skipped']} skipped, " + f"{max(pending, 0)} pending" + ) + self._pipeline_started = False - def _refresh_row(self, task: _TaskState, *, refresh: bool = True) -> None: + def _refresh_row(self, task: _TaskState) -> None: """Refresh a rendered task row.""" - self._progress.update( - task.progress_task_id, - completed=task.completed_subtasks, - row=self._render_row(task), - ) - if refresh: - self._progress.refresh() + task.last_rendered_elapsed_seconds = _elapsed_seconds(task) + if self._inline_updates: + self._rewrite_task_row(task) + return + self._print_row(self._render_row(task)) + + def _refresh_loop(self) -> None: + """Refresh running task timers once per displayed second.""" + while not self._stop_refresh.wait(ELAPSED_REFRESH_POLL_SECONDS): + with self._lock: + self._refresh_running_timers() + + def _refresh_running_timers(self) -> None: + """Refresh only the elapsed field for running tasks that advanced.""" + for task in self._task_lookup.values(): + if task.status != "running": + continue + elapsed_seconds = _elapsed_seconds(task) + if elapsed_seconds == task.last_rendered_elapsed_seconds: + continue + self._rewrite_elapsed(task) + task.last_rendered_elapsed_seconds = elapsed_seconds def _render_row(self, task: _TaskState) -> str: """Build a compact row for a task.""" @@ -161,6 +198,51 @@ def _render_row(self, task: _TaskState) -> str: f"{counter.rjust(COUNTER_WIDTH)} {elapsed.rjust(ELAPSED_WIDTH)}{error}" ) + def _print_row(self, row: str) -> None: + """Print a single task row.""" + self._console.print( + row, + markup=True, + highlight=False, + no_wrap=True, + overflow="crop", + ) + + def _rewrite_task_row(self, task: _TaskState) -> None: + """Rewrite a task row in place without repainting the whole table.""" + distance = self._distance_from_bottom(task) + self._console.control( + Control.move_to_column(0, y=-distance), + Control((ControlType.ERASE_IN_LINE, 2)), + ) + self._console.print( + self._render_row(task), + markup=True, + highlight=False, + no_wrap=True, + overflow="crop", + end="", + ) + self._restore_cursor(distance) + + def _rewrite_elapsed(self, task: _TaskState) -> None: + """Rewrite only the elapsed field for a running task.""" + distance = self._distance_from_bottom(task) + elapsed = _elapsed(task) + padded = elapsed.rjust(max(ELAPSED_WIDTH, len(elapsed))) + self._console.control(Control.move_to_column(ELAPSED_COLUMN, y=-distance)) + self._console.out(padded, end="") + self._restore_cursor(distance) + + def _restore_cursor(self, distance: int) -> None: + """Return the cursor to the stable line below the task list.""" + self._console.control(Control.move_to_column(0, y=distance)) + + def _distance_from_bottom(self, task: _TaskState) -> int: + """Return the cursor distance from the footer line to a task row.""" + row_index = self._task_order.index(task.task_id) + return len(self._task_order) - row_index + def _status_style(status: str) -> tuple[str, str]: """Map task state to badge text and color.""" @@ -196,8 +278,6 @@ def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: def _elapsed(task: _TaskState) -> str: """Format elapsed task time as M:SS.""" - if task.started_at is not None and task.finished_at is None: - return "..." seconds = _elapsed_seconds(task) minutes, sec = divmod(seconds, 60) return f"{minutes}:{sec:02d}" From 05d7bb8f4fc39f587d8c27980ecaf125b82eb6e7 Mon Sep 17 00:00:00 2001 From: John Chase Date: Tue, 7 Apr 2026 20:19:40 -0700 Subject: [PATCH 40/44] Fix flickering tty --- src/adagio/executors/cache_support.py | 2 - tests/test_tty_monitor.py | 75 +++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 tests/test_tty_monitor.py diff --git a/src/adagio/executors/cache_support.py b/src/adagio/executors/cache_support.py index 4d84c97..02d967c 100644 --- a/src/adagio/executors/cache_support.py +++ b/src/adagio/executors/cache_support.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from dataclasses import dataclass from pathlib import Path diff --git a/tests/test_tty_monitor.py b/tests/test_tty_monitor.py new file mode 100644 index 0000000..896e0f3 --- /dev/null +++ b/tests/test_tty_monitor.py @@ -0,0 +1,75 @@ +import io +import unittest +from unittest.mock import patch + +from rich.console import Console + +from adagio.monitor.tty import RichMonitor, _TaskState, _elapsed + + +class RichMonitorTests(unittest.TestCase): + def test_progress_auto_refresh_is_disabled(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + + self.assertFalse(monitor._inline_updates) + + def test_running_task_elapsed_uses_current_second(self) -> None: + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + ) + + with patch("adagio.monitor.tty.time.monotonic", return_value=18.9): + self.assertEqual(_elapsed(task), "0:08") + + def test_finished_task_elapsed_uses_duration(self) -> None: + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="completed", + started_at=10.0, + finished_at=75.0, + ) + + self.assertEqual(_elapsed(task), "1:05") + + def test_refresh_running_rows_skips_same_elapsed_bucket(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + monitor._task_lookup["task-1"] = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + last_rendered_elapsed_seconds=9, + ) + monitor._task_order.append("task-1") + + with patch.object(monitor, "_rewrite_elapsed") as rewrite_elapsed: + with patch("adagio.monitor.tty.time.monotonic", return_value=19.9): + monitor._refresh_running_timers() + + rewrite_elapsed.assert_not_called() + + def test_refresh_running_rows_updates_on_new_elapsed_second(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + last_rendered_elapsed_seconds=9, + ) + monitor._task_lookup["task-1"] = task + monitor._task_order.append("task-1") + + with patch.object(monitor, "_rewrite_elapsed") as rewrite_elapsed: + with patch("adagio.monitor.tty.time.monotonic", return_value=20.0): + monitor._refresh_running_timers() + + rewrite_elapsed.assert_called_once_with(task) From d004981bb9edb7ac0476f6080d32e0f59206df8b Mon Sep 17 00:00:00 2001 From: John Chase Date: Tue, 7 Apr 2026 21:49:14 -0700 Subject: [PATCH 41/44] Adds qapi token submission --- README.md | 11 +++++ src/adagio/cli/qapi.py | 11 +++++ src/adagio/qapi/client.py | 7 +++- tests/test_qapi_submit.py | 88 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 tests/test_qapi_submit.py diff --git a/README.md b/README.md index ddc895b..e483edc 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,17 @@ Generate and submit plugin metadata from the active QIIME environment: adagio qapi build --action-url http://localhost:81/api/v1 ``` +Submit to a protected deployment such as `adagiodata.com` with a scoped submission token: + +```bash +export ACTION_URL=https://adagiodata.com/api/v1 +export QAPI_SUBMISSION_TOKEN= +uv run adagio qapi build +``` + +You can also pass `--submission-token`, but the environment variable is safer because it does +not end up in shell history. + Write payload to disk without submitting: ```bash diff --git a/src/adagio/cli/qapi.py b/src/adagio/cli/qapi.py index 716dab1..a42657c 100644 --- a/src/adagio/cli/qapi.py +++ b/src/adagio/cli/qapi.py @@ -99,6 +99,16 @@ def build_qapi( help="Optional path to write the generated request JSON.", ), ] = None, + submission_token: Annotated[ + str | None, + Parameter( + name=("--submission-token",), + help=( + "Bearer token for protected QAPI submission routes. Defaults to " + "QAPI_SUBMISSION_TOKEN env var; prefer the env var to avoid shell history leaks." + ), + ), + ] = None, timeout: Annotated[ int, Parameter( @@ -151,6 +161,7 @@ def build_qapi( url, status, response_body = submit_qapi_payload( request_body, action_url=action_url, + submission_token=submission_token, timeout=timeout, dry_run=dry_run, force_overwrite=force_overwrite, diff --git a/src/adagio/qapi/client.py b/src/adagio/qapi/client.py index c72a342..e1a9907 100644 --- a/src/adagio/qapi/client.py +++ b/src/adagio/qapi/client.py @@ -9,6 +9,7 @@ def submit_qapi_payload( payload: dict[str, Any], *, action_url: str | None = None, + submission_token: str | None = None, timeout: int = 60, dry_run: bool = False, force_overwrite: bool = False, @@ -20,15 +21,19 @@ def submit_qapi_payload( ) url = resolved_action_url.rstrip("/") + "/qapi/" + resolved_submission_token = submission_token or os.getenv("QAPI_SUBMISSION_TOKEN") request_body = { **payload, "dry_run": dry_run, "force_overwrite": force_overwrite, } + headers = {"Content-Type": "application/json"} + if resolved_submission_token: + headers["Authorization"] = f"Bearer {resolved_submission_token}" req = Request( url=url, data=json.dumps(request_body).encode("utf-8"), - headers={"Content-Type": "application/json"}, + headers=headers, method="POST", ) diff --git a/tests/test_qapi_submit.py b/tests/test_qapi_submit.py new file mode 100644 index 0000000..1135716 --- /dev/null +++ b/tests/test_qapi_submit.py @@ -0,0 +1,88 @@ +import json +import os +import unittest +from unittest.mock import patch + +from adagio.cli import qapi as qapi_cli +from adagio.qapi.client import submit_qapi_payload + + +class _FakeResponse: + def __init__(self, body: object = None, status: int = 200) -> None: + self.status = status + self._body = "" if body is None else json.dumps(body) + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + def read(self) -> bytes: + return self._body.encode("utf-8") + + +class QapiSubmitTests(unittest.TestCase): + def test_submit_qapi_payload_adds_bearer_token_header(self) -> None: + seen_headers: dict[str, str | None] = {} + + def fake_urlopen(request, timeout=60): + seen_headers["authorization"] = request.get_header("Authorization") + seen_headers["content_type"] = request.get_header("Content-type") + return _FakeResponse({"message": "ok"}) + + with patch("adagio.qapi.client.urlopen", side_effect=fake_urlopen): + url, status, response = submit_qapi_payload( + {"qiime_version": "2024.10.0", "schema_version": "0.1.0", "data": {"dada2": {"methods": {}}}}, + action_url="https://adagiodata.com/api/v1", + submission_token="token-123", + ) + + self.assertEqual(url, "https://adagiodata.com/api/v1/qapi/") + self.assertEqual(status, 200) + self.assertEqual(response, {"message": "ok"}) + self.assertEqual(seen_headers["authorization"], "Bearer token-123") + self.assertEqual(seen_headers["content_type"], "application/json") + + def test_submit_qapi_payload_reads_submission_token_from_env(self) -> None: + seen_authorization: dict[str, str | None] = {} + + def fake_urlopen(request, timeout=60): + seen_authorization["value"] = request.get_header("Authorization") + return _FakeResponse({"message": "ok"}) + + with patch.dict(os.environ, {"QAPI_SUBMISSION_TOKEN": "env-token"}, clear=False): + with patch("adagio.qapi.client.urlopen", side_effect=fake_urlopen): + submit_qapi_payload( + { + "qiime_version": "2024.10.0", + "schema_version": "0.1.0", + "data": {"feature-table": {"methods": {}}}, + }, + action_url="https://adagiodata.com/api/v1", + ) + + self.assertEqual(seen_authorization["value"], "Bearer env-token") + + def test_build_qapi_passes_submission_token_to_client(self) -> None: + with patch( + "adagio.cli.qapi.generate_qapi_payload", + return_value={ + "qiime_version": "2024.10.0", + "schema_version": "0.1.0", + "data": {"dada2": {"methods": {}}}, + }, + ), patch("adagio.cli.qapi.submit_qapi_payload") as submit_mock: + submit_mock.return_value = ( + "https://adagiodata.com/api/v1/qapi/", + 200, + {"message": "ok"}, + ) + + qapi_cli.build_qapi( + action_url="https://adagiodata.com/api/v1", + submission_token="token-456", + ) + + submit_mock.assert_called_once() + self.assertEqual(submit_mock.call_args.kwargs["submission_token"], "token-456") From 14c30dbc1a1c5d56c1be52df9aa1354020dfe0cd Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 13 Apr 2026 22:53:43 -0700 Subject: [PATCH 42/44] Adds community pipeline support --- README.md | 11 ++ src/adagio/cli/dynamic.py | 16 +- src/adagio/cli/main.py | 105 ++++++++----- src/adagio/cli/pipeline.py | 16 +- src/adagio/cli/pipeline_sources.py | 206 +++++++++++++++++++++++++ src/adagio/cli/runner.py | 231 +++++++++++++++-------------- tests/test_pipeline_sources.py | 200 +++++++++++++++++++++++++ 7 files changed, 633 insertions(+), 152 deletions(-) create mode 100644 src/adagio/cli/pipeline_sources.py create mode 100644 tests/test_pipeline_sources.py diff --git a/README.md b/README.md index e483edc..1974f88 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,17 @@ Equivalent positional form: adagio run path/to/pipeline.json --cache-dir /path/to/cache ``` +Run a pipeline from a configured source reference: + +```bash +adagio run adagio-playbook/dada2 --cache-dir /path/to/cache +``` + +By default, `adagio-playbook/` resolves against the sibling +`adagio-pipelines` checkout when one is available in the surrounding workspace. +If no local catalog repo is found, Adagio falls back to the public +`cymis/adagio-pipelines` GitHub repository. + Use an arguments file: ```bash diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 3cc81b2..6e41b98 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -73,7 +73,9 @@ def _pipeline_type_label(type_hint: Any) -> str: return "TEXT" -def _display_type_label(*, spec_type: str | None, type_hint: Any, is_input: bool) -> str: +def _display_type_label( + *, spec_type: str | None, type_hint: Any, is_input: bool +) -> str: if is_input: return "PATH" @@ -332,7 +334,7 @@ def build_dynamic_run( CliParameter( name=("--pipeline", "-p"), group=command_group, - help="Path to the pipeline JSON file.", + help="Path to the pipeline file or a pipeline source reference.", ), ] } @@ -466,7 +468,9 @@ def add_dynamic_option( required_input_specs = [spec for spec in input_specs if spec.required] optional_input_specs = [spec for spec in input_specs if not spec.required] required_param_specs = [spec for spec in param_specs if _is_required_param(spec)] - optional_param_specs = [spec for spec in param_specs if not _is_required_param(spec)] + optional_param_specs = [ + spec for spec in param_specs if not _is_required_param(spec) + ] def add_input_spec(spec: InputSpec) -> None: original = spec.name @@ -518,7 +522,9 @@ def add_param_spec(spec: ParamSpec) -> None: argument_value = argument_params.get(original) has_argument_default = not _is_missing(argument_value) display_default = ( - default if default is not None else (argument_value if has_argument_default else None) + default + if default is not None + else (argument_value if has_argument_default else None) ) display_required = is_required and display_default is None param_default = None @@ -609,7 +615,7 @@ def run( run.__doc__ = ( "Run an Adagio pipeline.\n\n" "Dynamic inputs, parameters, and outputs are loaded from the pipeline file and exposed as CLI options.\n" - "Use: adagio run --pipeline PATH --help" + "Use: adagio run --pipeline PATH-OR-REFERENCE --help" ) return run diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 4d29c20..2a630be 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -1,5 +1,6 @@ import json import sys +from contextlib import ExitStack from functools import partial from pathlib import Path from typing import Annotated, Any @@ -17,6 +18,7 @@ from .config import load_run_config from .dynamic import build_dynamic_run from .pipeline import run_pipeline_cli +from .pipeline_sources import PipelineResolutionError, resolve_pipeline_reference from .qapi import run_qapi from .runner import run_pipeline_from_kwargs @@ -61,7 +63,11 @@ def main(argv: list[str] | None = None) -> None: ShowParamsMode(show_mode_str) if show_mode_str else ShowParamsMode.REQUIRED ) except ValueError: - console.print(CycloptsPanel("Invalid --show-params value. Use one of: all, missing, required.")) + console.print( + CycloptsPanel( + "Invalid --show-params value. Use one of: all, missing, required." + ) + ) sys.exit(1) if pipeline_str is None: pipeline_str = positional_pipeline @@ -71,6 +77,7 @@ def main(argv: list[str] | None = None) -> None: help="Adagio command line tool for processing pipelines created with the Adagio GUI.", help_format="rich", ) + @app.command def cache() -> None: """Manage the shared QIIME cache directory.""" @@ -106,7 +113,7 @@ def run( Parameter( name=("--pipeline", "-p"), group=command_group, - help="Path to the pipeline JSON file.", + help="Path to the pipeline file or a pipeline source reference.", ), ], arguments: Annotated[ @@ -153,42 +160,57 @@ def run( ): """Run a pipeline (requires --pipeline; dynamic options come from that file).""" _ = (config, show_params, cache_dir, reuse) - console.print(CycloptsPanel("Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help")) + console.print( + CycloptsPanel( + "Missing --pipeline. Try:\n adagio run --pipeline pipeline.adg --help" + ) + ) sys.exit(1) app(argv) return - pipeline_path = Path(pipeline_str) - data = json.loads(pipeline_path.read_text(encoding="utf-8")) - input_specs = parse_inputs(data) - param_specs = parse_parameters(data) - output_specs = parse_outputs(data) - arguments_path_str = extract_flag_value(argv, "--arguments") - config_path_str = extract_flag_value(argv, "--config") - arguments_data = ( - _load_arguments_data(Path(arguments_path_str), console) if arguments_path_str else None - ) - if config_path_str: - load_run_config(Path(config_path_str)) - visible_inputs, visible_params, visible_outputs = _filter_visible_specs( - input_specs=input_specs, - param_specs=param_specs, - output_specs=output_specs, - show_mode=show_mode, - arguments_data=arguments_data, - ) + with ExitStack() as exit_stack: + pipeline_path = _resolve_pipeline_path( + pipeline_str, + console=console, + exit_stack=exit_stack, + ) + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + input_specs = parse_inputs(data) + param_specs = parse_parameters(data) + output_specs = parse_outputs(data) + arguments_path_str = extract_flag_value(argv, "--arguments") + config_path_str = extract_flag_value(argv, "--config") + arguments_data = ( + _load_arguments_data(Path(arguments_path_str), console) + if arguments_path_str + else None + ) + if config_path_str: + load_run_config(Path(config_path_str)) + visible_inputs, visible_params, visible_outputs = _filter_visible_specs( + input_specs=input_specs, + param_specs=param_specs, + output_specs=output_specs, + show_mode=show_mode, + arguments_data=arguments_data, + ) - dynamic_run = build_dynamic_run( - input_specs=visible_inputs, - param_specs=visible_params, - output_specs=visible_outputs, - argument_inputs=arguments_data.get("inputs", {}) if arguments_data else None, - argument_params=arguments_data.get("parameters", {}) if arguments_data else None, - run_handler=partial(run_pipeline_from_kwargs, console=console), - ) - app.command(dynamic_run, name="run") - app(argv) + dynamic_run = build_dynamic_run( + input_specs=visible_inputs, + param_specs=visible_params, + output_specs=visible_outputs, + argument_inputs=arguments_data.get("inputs", {}) + if arguments_data + else None, + argument_params=arguments_data.get("parameters", {}) + if arguments_data + else None, + run_handler=partial(run_pipeline_from_kwargs, console=console), + ) + app.command(dynamic_run, name="run") + app(argv) def _filter_visible_specs( @@ -248,7 +270,11 @@ def _load_arguments_data(path: Path, _console: Console | None = None) -> dict[st if not isinstance(data.get("inputs"), dict) or not isinstance( data.get("parameters"), dict ): - _con.print(CycloptsPanel("Invalid arguments file: 'inputs' and 'parameters' must be objects.")) + _con.print( + CycloptsPanel( + "Invalid arguments file: 'inputs' and 'parameters' must be objects." + ) + ) sys.exit(1) return data @@ -257,5 +283,18 @@ def _is_missing(value: Any) -> bool: return value is None or value == "" +def _resolve_pipeline_path( + reference: str, + *, + console: Console, + exit_stack: ExitStack, +) -> Path: + try: + return resolve_pipeline_reference(reference, exit_stack=exit_stack) + except PipelineResolutionError as error: + console.print(CycloptsPanel(str(error))) + sys.exit(1) + + if __name__ == "__main__": main() diff --git a/src/adagio/cli/pipeline.py b/src/adagio/cli/pipeline.py index 0d7168c..5b54713 100644 --- a/src/adagio/cli/pipeline.py +++ b/src/adagio/cli/pipeline.py @@ -1,4 +1,5 @@ import json +from contextlib import ExitStack from pathlib import Path from cyclopts import App @@ -6,6 +7,7 @@ from ..describe import render_pipeline_text from ..model.pipeline import AdagioPipeline +from .pipeline_sources import PipelineResolutionError, resolve_pipeline_reference console = Console() @@ -21,7 +23,13 @@ def run_pipeline_cli(argv: list[str]) -> None: def show_pipeline(pipeline: Path) -> None: """Print a pipeline summary to the terminal.""" - data = json.loads(pipeline.read_text(encoding="utf-8")) - pipeline_data = data.get("spec", data) if isinstance(data, dict) else data - parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) - console.print(render_pipeline_text(parsed_pipeline), soft_wrap=True) + with ExitStack() as exit_stack: + try: + pipeline_path = resolve_pipeline_reference(pipeline, exit_stack=exit_stack) + except PipelineResolutionError as error: + raise SystemExit(str(error)) from error + + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) + console.print(render_pipeline_text(parsed_pipeline), soft_wrap=True) diff --git a/src/adagio/cli/pipeline_sources.py b/src/adagio/cli/pipeline_sources.py new file mode 100644 index 0000000..f39fc2b --- /dev/null +++ b/src/adagio/cli/pipeline_sources.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +from contextlib import ExitStack +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from urllib.error import HTTPError, URLError +from urllib.parse import quote +from urllib.request import Request, urlopen + + +CATALOG_TIERS = ("community", "official") +DEFAULT_PIPELINE_SOURCE = "adagio-playbook" + + +class PipelineResolutionError(RuntimeError): + """Raised when a pipeline reference cannot be resolved.""" + + +@dataclass(frozen=True) +class LocalCatalogLocation: + root: Path + + def candidate_paths(self, slug: str) -> tuple[Path, ...]: + return tuple( + (self.root / "pipelines" / tier / slug / "pipeline.adg").resolve() + for tier in CATALOG_TIERS + ) + + +@dataclass(frozen=True) +class GitHubCatalogLocation: + owner: str + repo: str + ref: str = "main" + + def candidate_urls(self, slug: str) -> tuple[str, ...]: + quoted_slug = _quote_slug(slug) + return tuple( + f"https://raw.githubusercontent.com/{self.owner}/{self.repo}/{self.ref}/" + f"pipelines/{tier}/{quoted_slug}/pipeline.adg" + for tier in CATALOG_TIERS + ) + + +@dataclass(frozen=True) +class PipelineSource: + name: str + locations: tuple[LocalCatalogLocation | GitHubCatalogLocation, ...] + + +def parse_pipeline_source_reference(reference: str) -> tuple[str, str] | None: + raw = reference.strip() + if not raw: + return None + if raw.startswith(("/", "./", "../", "~")): + return None + if "://" in raw: + return None + if Path(raw).suffix in {".adg", ".json"}: + return None + + source_name, separator, slug = raw.partition("/") + if not separator or not source_name or not slug: + return None + return source_name, slug + + +def discover_workspace_catalog_roots( + *, search_roots: tuple[Path, ...] | None = None +) -> tuple[Path, ...]: + seen: set[Path] = set() + discovered: list[Path] = [] + anchors = list(search_roots or ()) + anchors.extend([Path.cwd(), Path(__file__).resolve()]) + + for anchor in anchors: + current = anchor.resolve() + if current.is_file(): + current = current.parent + + for parent in (current, *current.parents): + for candidate in _catalog_candidates(parent): + resolved = candidate.resolve() + if resolved in seen: + continue + seen.add(resolved) + discovered.append(resolved) + + return tuple(discovered) + + +def default_pipeline_sources( + *, search_roots: tuple[Path, ...] | None = None +) -> tuple[PipelineSource, ...]: + local_locations = tuple( + LocalCatalogLocation(root=root) + for root in discover_workspace_catalog_roots(search_roots=search_roots) + ) + github_fallback = GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines") + return ( + PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(*local_locations, github_fallback), + ), + ) + + +def resolve_pipeline_reference( + reference: str | Path, + *, + exit_stack: ExitStack, + sources: tuple[PipelineSource, ...] | None = None, +) -> Path: + raw = str(reference).strip() + if not raw: + raise PipelineResolutionError("Pipeline reference is empty.") + + candidate_path = Path(raw).expanduser() + if candidate_path.exists(): + return candidate_path.resolve() + + parsed_reference = parse_pipeline_source_reference(raw) + if parsed_reference is None: + raise PipelineResolutionError(f"Pipeline file does not exist: {raw}") + + source_name, slug = parsed_reference + source_registry = { + source.name: source for source in (sources or default_pipeline_sources()) + } + source = source_registry.get(source_name) + if source is None: + available = ", ".join(sorted(source_registry)) or "none" + raise PipelineResolutionError( + f"Unknown pipeline source '{source_name}'. Available sources: {available}." + ) + + attempted_candidates: list[str] = [] + access_errors: list[str] = [] + + for location in source.locations: + if isinstance(location, LocalCatalogLocation): + for path in location.candidate_paths(slug): + attempted_candidates.append(str(path)) + if path.exists(): + return path + continue + + for url in location.candidate_urls(slug): + attempted_candidates.append(url) + try: + return _download_remote_pipeline(url=url, exit_stack=exit_stack) + except FileNotFoundError: + continue + except PipelineResolutionError as error: + access_errors.append(str(error)) + break + + message = [f"Pipeline reference '{raw}' was not found."] + if attempted_candidates: + message.append("Looked in:") + message.extend(f" - {candidate}" for candidate in attempted_candidates) + if access_errors: + message.append("Errors:") + message.extend(f" - {error}" for error in access_errors) + raise PipelineResolutionError("\n".join(message)) + + +def _catalog_candidates(parent: Path) -> tuple[Path, ...]: + candidates: list[Path] = [] + if parent.name == "adagio-pipelines" and (parent / "pipelines").is_dir(): + candidates.append(parent) + + sibling = parent / "adagio-pipelines" + if sibling.is_dir() and (sibling / "pipelines").is_dir(): + candidates.append(sibling) + + return tuple(candidates) + + +def _download_remote_pipeline(*, url: str, exit_stack: ExitStack) -> Path: + request = Request(url, headers={"User-Agent": "adagio-cli"}) + try: + with urlopen(request, timeout=10) as response: + payload = response.read() + except HTTPError as error: + if error.code == 404: + raise FileNotFoundError(url) from error + raise PipelineResolutionError( + f"Failed to fetch pipeline from {url}: HTTP {error.code}" + ) from error + except URLError as error: + raise PipelineResolutionError( + f"Failed to fetch pipeline from {url}: {error.reason}" + ) from error + + tempdir = Path( + exit_stack.enter_context(TemporaryDirectory(prefix="adagio-pipeline-")) + ) + pipeline_path = tempdir / "pipeline.adg" + pipeline_path.write_bytes(payload) + return pipeline_path + + +def _quote_slug(slug: str) -> str: + return "/".join(quote(part) for part in Path(slug).parts) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 6783355..1efb4d6 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -1,6 +1,7 @@ import json import os import sys +from contextlib import ExitStack from pathlib import Path from typing import Any @@ -10,6 +11,7 @@ from rich.text import Text from .config import load_run_config +from .pipeline_sources import PipelineResolutionError, resolve_pipeline_reference from ..executors.base import TaskEnvironmentOverride from ..executors.cache_support import ( describe_cache_config, @@ -54,125 +56,134 @@ def run_pipeline_from_kwargs( cache_dir = kwargs.pop("cache_dir", None) reuse = bool(kwargs.pop("reuse", True)) - data = json.loads(pipeline.read_text(encoding="utf-8")) - pipeline_data = data.get("spec", data) if isinstance(data, dict) else data - parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) - arguments = parsed_pipeline.signature.to_default_arguments() - run_config = load_run_config(config_file) - output_names = [output.name for output in parsed_pipeline.signature.outputs] - - input_names = {name for _, name in input_bindings} - param_names = {name for _, name in param_bindings} - output_name_set = set(output_names) - - if arguments_file is not None: - file_data = json.loads(arguments_file.read_text(encoding="utf-8")) - arguments_data = AdagioArgumentsFile.model_validate(file_data) - - unknown_inputs = sorted(set(arguments_data.inputs) - input_names) - if unknown_inputs: - _error_exit( - console, - "Unknown inputs in arguments file: " + ", ".join(unknown_inputs), - ) - - unknown_params = sorted(set(arguments_data.parameters) - param_names) - if unknown_params: - _error_exit( - console, - "Unknown parameters in arguments file: " + ", ".join(unknown_params), - ) - - unknown_outputs: list[str] = [] - if isinstance(arguments_data.outputs, dict): - unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) - if unknown_outputs: - _error_exit( - console, - "Unknown outputs in arguments file: " + ", ".join(unknown_outputs), - ) - - arguments.inputs.update(arguments_data.inputs) - arguments.parameters.update(arguments_data.parameters) - if arguments_data.outputs is not None: - arguments.outputs = arguments_data.outputs - - for ident, original in input_bindings: - value = kwargs.get(ident) - if value is not None: - arguments.inputs[original] = str(value) - - for ident, original in param_bindings: - value = kwargs.get(ident) - if value is not None: - arguments.parameters[original] = value - - cli_output_dir = kwargs.get(output_dir_ident) - cli_output_overrides = { - original: str(value) - for ident, original in output_bindings - if (value := kwargs.get(ident)) is not None - } - arguments.outputs = _apply_output_overrides( - outputs=arguments.outputs, - output_names=output_names, - output_dir=str(cli_output_dir) if cli_output_dir is not None else None, - output_overrides=cli_output_overrides, - ) + with ExitStack() as exit_stack: + try: + pipeline_path = resolve_pipeline_reference(pipeline, exit_stack=exit_stack) + except PipelineResolutionError as error: + _error_exit(console, str(error)) + + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) + arguments = parsed_pipeline.signature.to_default_arguments() + run_config = load_run_config(config_file) + output_names = [output.name for output in parsed_pipeline.signature.outputs] + + input_names = {name for _, name in input_bindings} + param_names = {name for _, name in param_bindings} + output_name_set = set(output_names) + + if arguments_file is not None: + file_data = json.loads(arguments_file.read_text(encoding="utf-8")) + arguments_data = AdagioArgumentsFile.model_validate(file_data) + + unknown_inputs = sorted(set(arguments_data.inputs) - input_names) + if unknown_inputs: + _error_exit( + console, + "Unknown inputs in arguments file: " + ", ".join(unknown_inputs), + ) + + unknown_params = sorted(set(arguments_data.parameters) - param_names) + if unknown_params: + _error_exit( + console, + "Unknown parameters in arguments file: " + + ", ".join(unknown_params), + ) + + unknown_outputs: list[str] = [] + if isinstance(arguments_data.outputs, dict): + unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) + if unknown_outputs: + _error_exit( + console, + "Unknown outputs in arguments file: " + ", ".join(unknown_outputs), + ) + + arguments.inputs.update(arguments_data.inputs) + arguments.parameters.update(arguments_data.parameters) + if arguments_data.outputs is not None: + arguments.outputs = arguments_data.outputs + + for ident, original in input_bindings: + value = kwargs.get(ident) + if value is not None: + arguments.inputs[original] = str(value) + + for ident, original in param_bindings: + value = kwargs.get(ident) + if value is not None: + arguments.parameters[original] = value + + cli_output_dir = kwargs.get(output_dir_ident) + cli_output_overrides = { + original: str(value) + for ident, original in output_bindings + if (value := kwargs.get(ident)) is not None + } + arguments.outputs = _apply_output_overrides( + outputs=arguments.outputs, + output_names=output_names, + output_dir=str(cli_output_dir) if cli_output_dir is not None else None, + output_overrides=cli_output_overrides, + ) - missing_inputs = [ - name for name in required_inputs if _is_missing(arguments.inputs.get(name)) - ] - missing_params = [ - name for name in required_params if _is_missing(arguments.parameters.get(name)) - ] - if missing_inputs or missing_params: - missing_opts = [f"--input-{n.replace('_', '-')}" for n in missing_inputs] + [ - f"--param-{n.replace('_', '-')}" for n in missing_params + missing_inputs = [ + name for name in required_inputs if _is_missing(arguments.inputs.get(name)) ] - formatted = ", ".join(f"[cyan]{opt}[/cyan]" for opt in missing_opts) - _error_exit(console, f"Missing required arguments: {formatted}") - - arguments.outputs = _resolve_output_destinations( - outputs=arguments.outputs, - output_names=output_names, - cwd=Path.cwd().resolve(), - ) + missing_params = [ + name + for name in required_params + if _is_missing(arguments.parameters.get(name)) + ] + if missing_inputs or missing_params: + missing_opts = [ + f"--input-{n.replace('_', '-')}" for n in missing_inputs + ] + [f"--param-{n.replace('_', '-')}" for n in missing_params] + formatted = ", ".join(f"[cyan]{opt}[/cyan]" for opt in missing_opts) + _error_exit(console, f"Missing required arguments: {formatted}") + + arguments.outputs = _resolve_output_destinations( + outputs=arguments.outputs, + output_names=output_names, + cwd=Path.cwd().resolve(), + ) - suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) - if not suppress_header: - console.print(f"[bold]Pipeline:[/bold] {pipeline}") + suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) + if not suppress_header: + console.print(f"[bold]Pipeline:[/bold] {pipeline}") - cache_config = resolve_cache_config( - cwd=Path.cwd().resolve(), - cache_dir=cache_dir, - reuse=reuse, - ) + cache_config = resolve_cache_config( + cwd=Path.cwd().resolve(), + cache_dir=cache_dir, + reuse=reuse, + ) - if not suppress_header: - console.print(f"[bold]Cache:[/bold] {describe_cache_config(cache_config)}") + if not suppress_header: + console.print(f"[bold]Cache:[/bold] {describe_cache_config(cache_config)}") - from ..executors import select_default_executor + from ..executors import select_default_executor - executor = select_default_executor( - default_override=_config_default_override(run_config), - plugin_overrides=_config_named_overrides( - run_config.plugins if run_config is not None else {} - ), - task_overrides=_config_named_overrides( - run_config.tasks if run_config is not None else {} - ), - ) + executor = select_default_executor( + default_override=_config_default_override(run_config), + plugin_overrides=_config_named_overrides( + run_config.plugins if run_config is not None else {} + ), + task_overrides=_config_named_overrides( + run_config.tasks if run_config is not None else {} + ), + ) - if not suppress_header: - console.print(f"[bold]Executing pipeline[/bold] ({executor.mode_label})") + if not suppress_header: + console.print(f"[bold]Executing pipeline[/bold] ({executor.mode_label})") - executor.execute( - pipeline=parsed_pipeline, - arguments=arguments, - console=console, - cache_config=cache_config, - ) + executor.execute( + pipeline=parsed_pipeline, + arguments=arguments, + console=console, + cache_config=cache_config, + ) def _is_missing(value: Any) -> bool: diff --git a/tests/test_pipeline_sources.py b/tests/test_pipeline_sources.py new file mode 100644 index 0000000..c0f39dc --- /dev/null +++ b/tests/test_pipeline_sources.py @@ -0,0 +1,200 @@ +import io +import json +import tempfile +import unittest +from contextlib import ExitStack +from pathlib import Path +from unittest.mock import patch + +from rich.console import Console + +from adagio.cli.pipeline import show_pipeline +from adagio.cli.pipeline_sources import ( + DEFAULT_PIPELINE_SOURCE, + GitHubCatalogLocation, + LocalCatalogLocation, + PipelineResolutionError, + PipelineSource, + discover_workspace_catalog_roots, + parse_pipeline_source_reference, + resolve_pipeline_reference, +) + + +def _sample_pipeline_payload() -> dict: + return { + "spec": { + "type": "pipeline", + "signature": { + "inputs": [], + "parameters": [], + "outputs": [], + }, + "graph": [ + { + "id": "task-dada2", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": {}, + "parameters": {}, + "outputs": {}, + } + ], + } + } + + +class _FakeResponse: + def __init__(self, payload: bytes) -> None: + self._payload = payload + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, exc_type, exc, tb) -> bool: + return False + + def read(self) -> bytes: + return self._payload + + +class PipelineSourceTests(unittest.TestCase): + def test_parse_pipeline_source_reference_recognizes_source_slug_syntax( + self, + ) -> None: + self.assertEqual( + parse_pipeline_source_reference("adagio-playbook/denoise"), + ("adagio-playbook", "denoise"), + ) + self.assertIsNone(parse_pipeline_source_reference("./pipeline.adg")) + self.assertIsNone(parse_pipeline_source_reference("pipeline.adg")) + + def test_discover_workspace_catalog_roots_finds_sibling_repo_from_worktree( + self, + ) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + workspace = Path(tmpdir) + catalog_root = workspace / "adagio-pipelines" + (catalog_root / "pipelines" / "community").mkdir(parents=True) + worktree_root = workspace / ".worktrees" / "adagio-cli-community-pipelines" + worktree_root.mkdir(parents=True) + + discovered = discover_workspace_catalog_roots(search_roots=(worktree_root,)) + + self.assertIn(catalog_root.resolve(), discovered) + + def test_existing_local_path_takes_precedence_over_source_resolution(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_path = Path(tmpdir) / DEFAULT_PIPELINE_SOURCE / "denoise" + pipeline_path.parent.mkdir(parents=True) + pipeline_path.write_text("{}", encoding="utf-8") + + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference( + pipeline_path, exit_stack=exit_stack + ) + + self.assertEqual(resolved, pipeline_path.resolve()) + + def test_source_reference_resolves_from_local_catalog(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + catalog_root = Path(tmpdir) / "adagio-pipelines" + pipeline_path = ( + catalog_root / "pipelines" / "community" / "denoise" / "pipeline.adg" + ) + pipeline_path.parent.mkdir(parents=True) + pipeline_path.write_text("{}", encoding="utf-8") + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(LocalCatalogLocation(root=catalog_root),), + ) + + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference( + f"{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertEqual(resolved, pipeline_path.resolve()) + + def test_source_reference_falls_back_to_github_when_needed(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + + with patch( + "adagio.cli.pipeline_sources.urlopen", + return_value=_FakeResponse( + b'{"spec": {"type": "pipeline", "signature": {"inputs": [], "parameters": [], "outputs": []}, "graph": []}}' + ), + ) as mock_urlopen: + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference( + f"{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + payload = json.loads(resolved.read_text(encoding="utf-8")) + + request = mock_urlopen.call_args.args[0] + self.assertIn("/pipelines/community/denoise/pipeline.adg", request.full_url) + self.assertEqual(payload["spec"]["type"], "pipeline") + + def test_missing_source_reference_reports_attempted_locations(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=( + LocalCatalogLocation(root=Path(tmpdir) / "adagio-pipelines"), + ), + ) + + with ExitStack() as exit_stack: + with self.assertRaises(PipelineResolutionError) as error: + resolve_pipeline_reference( + f"{DEFAULT_PIPELINE_SOURCE}/missing", + exit_stack=exit_stack, + sources=(source,), + ) + + message = str(error.exception) + self.assertIn( + "Pipeline reference 'adagio-playbook/missing' was not found.", message + ) + self.assertIn("pipelines/community/missing/pipeline.adg", message) + + +class PipelineSourceIntegrationTests(unittest.TestCase): + def test_pipeline_show_accepts_source_reference(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + catalog_root = Path(tmpdir) / "adagio-pipelines" + pipeline_path = ( + catalog_root / "pipelines" / "community" / "denoise" / "pipeline.adg" + ) + pipeline_path.parent.mkdir(parents=True) + pipeline_path.write_text( + json.dumps(_sample_pipeline_payload()), + encoding="utf-8", + ) + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(LocalCatalogLocation(root=catalog_root),), + ) + output = io.StringIO() + console = Console(file=output, width=120, record=True) + + with patch( + "adagio.cli.pipeline_sources.default_pipeline_sources", + return_value=(source,), + ): + with patch("adagio.cli.pipeline.console", console): + show_pipeline(Path(f"{DEFAULT_PIPELINE_SOURCE}/denoise")) + + self.assertIn("dada2.denoise_single", output.getvalue()) + + +if __name__ == "__main__": + unittest.main() From eab81703dab6797e41fba499faa506c84abd4621 Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 4 May 2026 21:41:12 -0700 Subject: [PATCH 43/44] Removes user configuration --- README.md | 15 +- src/adagio/cli/dynamic.py | 4 +- src/adagio/cli/main.py | 43 ++++-- src/adagio/cli/pipeline_sources.py | 111 ++++++++++++-- src/adagio/cli/runner.py | 25 ++- tests/test_pipeline_sources.py | 236 +++++++++++++++++++++++++++-- 6 files changed, 390 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 1974f88..94f5e98 100644 --- a/README.md +++ b/README.md @@ -59,16 +59,23 @@ Equivalent positional form: adagio run path/to/pipeline.json --cache-dir /path/to/cache ``` -Run a pipeline from a configured source reference: +Run a pipeline from the Adagio pipeline catalog: ```bash -adagio run adagio-playbook/dada2 --cache-dir /path/to/cache +adagio run @adagio/microbial-diversity --cache-dir /path/to/cache ``` -By default, `adagio-playbook/` resolves against the sibling +By default, `@adagio/` resolves against the sibling `adagio-pipelines` checkout when one is available in the surrounding workspace. If no local catalog repo is found, Adagio falls back to the public -`cymis/adagio-pipelines` GitHub repository. +`cymis/adagio-pipelines` GitHub repository. Within a catalog, `official` is +checked before `community`. + +During `adagio run`, remote catalog pipelines are downloaded under the selected +`--cache-dir` and reused by source name and slug on later runs. `adagio pipeline +show` uses a temporary download when it needs to fetch from GitHub because it +does not take a cache directory. Run output includes the resolved local path or +remote URL so you can see where the pipeline came from. Use an arguments file: diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 6e41b98..668a01a 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -334,7 +334,7 @@ def build_dynamic_run( CliParameter( name=("--pipeline", "-p"), group=command_group, - help="Path to the pipeline file or a pipeline source reference.", + help="Path to the pipeline file or a catalog reference like @adagio/slug.", ), ] } @@ -615,7 +615,7 @@ def run( run.__doc__ = ( "Run an Adagio pipeline.\n\n" "Dynamic inputs, parameters, and outputs are loaded from the pipeline file and exposed as CLI options.\n" - "Use: adagio run --pipeline PATH-OR-REFERENCE --help" + "Use: adagio run --pipeline PATH-OR-@SOURCE/SLUG --help" ) return run diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 2a630be..087a62c 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -13,12 +13,16 @@ from ..app.parsers.pipeline import Output as OutputSpec from ..app.parsers.pipeline import Parameter as ParamSpec from ..app.parsers.pipeline import parse_inputs, parse_outputs, parse_parameters -from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP +from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP, resolve_cache_dir_path from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline from .config import load_run_config from .dynamic import build_dynamic_run from .pipeline import run_pipeline_cli -from .pipeline_sources import PipelineResolutionError, resolve_pipeline_reference +from .pipeline_sources import ( + PipelineResolution, + PipelineResolutionError, + resolve_pipeline_reference_details, +) from .qapi import run_qapi from .runner import run_pipeline_from_kwargs @@ -57,6 +61,7 @@ def main(argv: list[str] | None = None) -> None: argv, positional_pipeline = promote_positional_pipeline(argv) pipeline_str = extract_flag_value(argv, "--pipeline", "-p") + cache_dir_str = extract_flag_value(argv, "--cache-dir") show_mode_str = extract_flag_value(argv, "--show-params") try: show_mode = ( @@ -113,7 +118,7 @@ def run( Parameter( name=("--pipeline", "-p"), group=command_group, - help="Path to the pipeline file or a pipeline source reference.", + help="Path to the pipeline file or a catalog reference like @adagio/slug.", ), ], arguments: Annotated[ @@ -162,7 +167,9 @@ def run( _ = (config, show_params, cache_dir, reuse) console.print( CycloptsPanel( - "Missing --pipeline. Try:\n adagio run --pipeline pipeline.adg --help" + "Missing --pipeline. Try:\n" + " adagio run --pipeline pipeline.adg --help\n" + " adagio run @adagio/microbial-diversity --help" ) ) sys.exit(1) @@ -171,12 +178,13 @@ def run( return with ExitStack() as exit_stack: - pipeline_path = _resolve_pipeline_path( + pipeline_resolution = _resolve_pipeline( pipeline_str, console=console, exit_stack=exit_stack, + download_cache_dir=_resolve_download_cache_dir(cache_dir_str), ) - data = json.loads(pipeline_path.read_text(encoding="utf-8")) + data = json.loads(pipeline_resolution.path.read_text(encoding="utf-8")) input_specs = parse_inputs(data) param_specs = parse_parameters(data) output_specs = parse_outputs(data) @@ -207,7 +215,11 @@ def run( argument_params=arguments_data.get("parameters", {}) if arguments_data else None, - run_handler=partial(run_pipeline_from_kwargs, console=console), + run_handler=partial( + run_pipeline_from_kwargs, + console=console, + resolved_pipeline=pipeline_resolution, + ), ) app.command(dynamic_run, name="run") app(argv) @@ -283,18 +295,29 @@ def _is_missing(value: Any) -> bool: return value is None or value == "" -def _resolve_pipeline_path( +def _resolve_pipeline( reference: str, *, console: Console, exit_stack: ExitStack, -) -> Path: + download_cache_dir: Path | None = None, +) -> PipelineResolution: try: - return resolve_pipeline_reference(reference, exit_stack=exit_stack) + return resolve_pipeline_reference_details( + reference, + exit_stack=exit_stack, + download_cache_dir=download_cache_dir, + ) except PipelineResolutionError as error: console.print(CycloptsPanel(str(error))) sys.exit(1) +def _resolve_download_cache_dir(raw_value: str | None) -> Path | None: + if raw_value is None: + return None + return resolve_cache_dir_path(cwd=Path.cwd().resolve(), raw_value=raw_value) + + if __name__ == "__main__": main() diff --git a/src/adagio/cli/pipeline_sources.py b/src/adagio/cli/pipeline_sources.py index f39fc2b..1cddb4e 100644 --- a/src/adagio/cli/pipeline_sources.py +++ b/src/adagio/cli/pipeline_sources.py @@ -1,16 +1,19 @@ from __future__ import annotations +import re from contextlib import ExitStack -from dataclasses import dataclass from pathlib import Path +from dataclasses import dataclass from tempfile import TemporaryDirectory from urllib.error import HTTPError, URLError from urllib.parse import quote from urllib.request import Request, urlopen -CATALOG_TIERS = ("community", "official") -DEFAULT_PIPELINE_SOURCE = "adagio-playbook" +CATALOG_TIERS = ("official", "community") +DEFAULT_PIPELINE_SOURCE = "adagio" +SOURCE_NAME_RE = re.compile(r"[A-Za-z0-9][A-Za-z0-9._-]*") +SLUG_RE = re.compile(r"[a-z0-9][a-z0-9-]*") class PipelineResolutionError(RuntimeError): @@ -49,6 +52,13 @@ class PipelineSource: locations: tuple[LocalCatalogLocation | GitHubCatalogLocation, ...] +@dataclass(frozen=True) +class PipelineResolution: + path: Path + origin: str + is_remote: bool = False + + def parse_pipeline_source_reference(reference: str) -> tuple[str, str] | None: raw = reference.strip() if not raw: @@ -59,10 +69,18 @@ def parse_pipeline_source_reference(reference: str) -> tuple[str, str] | None: return None if Path(raw).suffix in {".adg", ".json"}: return None + if not raw.startswith("@"): + return None - source_name, separator, slug = raw.partition("/") + source_name, separator, slug = raw[1:].partition("/") if not separator or not source_name or not slug: return None + if not SOURCE_NAME_RE.fullmatch(source_name): + return None + if source_name != DEFAULT_PIPELINE_SOURCE: + return None + if not SLUG_RE.fullmatch(slug): + return None return source_name, slug @@ -98,10 +116,11 @@ def default_pipeline_sources( for root in discover_workspace_catalog_roots(search_roots=search_roots) ) github_fallback = GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines") + built_in_locations = (*local_locations, github_fallback) return ( PipelineSource( name=DEFAULT_PIPELINE_SOURCE, - locations=(*local_locations, github_fallback), + locations=built_in_locations, ), ) @@ -111,23 +130,44 @@ def resolve_pipeline_reference( *, exit_stack: ExitStack, sources: tuple[PipelineSource, ...] | None = None, + download_cache_dir: Path | None = None, ) -> Path: + return resolve_pipeline_reference_details( + reference, + exit_stack=exit_stack, + sources=sources, + download_cache_dir=download_cache_dir, + ).path + + +def resolve_pipeline_reference_details( + reference: str | Path, + *, + exit_stack: ExitStack, + sources: tuple[PipelineSource, ...] | None = None, + download_cache_dir: Path | None = None, +) -> PipelineResolution: raw = str(reference).strip() if not raw: raise PipelineResolutionError("Pipeline reference is empty.") candidate_path = Path(raw).expanduser() if candidate_path.exists(): - return candidate_path.resolve() + resolved_path = candidate_path.resolve() + return PipelineResolution(path=resolved_path, origin=str(resolved_path)) parsed_reference = parse_pipeline_source_reference(raw) if parsed_reference is None: + if raw.startswith("@"): + raise PipelineResolutionError( + f"Invalid pipeline reference '{raw}'. Expected @adagio/slug, " + "where slug uses lowercase letters, digits, and hyphens." + ) raise PipelineResolutionError(f"Pipeline file does not exist: {raw}") source_name, slug = parsed_reference - source_registry = { - source.name: source for source in (sources or default_pipeline_sources()) - } + registered_sources = default_pipeline_sources() if sources is None else sources + source_registry = {source.name: source for source in registered_sources} source = source_registry.get(source_name) if source is None: available = ", ".join(sorted(source_registry)) or "none" @@ -143,13 +183,31 @@ def resolve_pipeline_reference( for path in location.candidate_paths(slug): attempted_candidates.append(str(path)) if path.exists(): - return path + return PipelineResolution(path=path, origin=str(path)) continue + cached_path = _cached_remote_pipeline_path( + cache_dir=download_cache_dir, + source_name=source_name, + slug=slug, + ) + if cached_path is not None: + attempted_candidates.append(str(cached_path)) + if cached_path.exists(): + return PipelineResolution(path=cached_path, origin=str(cached_path)) + for url in location.candidate_urls(slug): attempted_candidates.append(url) try: - return _download_remote_pipeline(url=url, exit_stack=exit_stack) + return PipelineResolution( + path=_download_remote_pipeline( + url=url, + exit_stack=exit_stack, + cache_path=cached_path, + ), + origin=url, + is_remote=True, + ) except FileNotFoundError: continue except PipelineResolutionError as error: @@ -178,7 +236,12 @@ def _catalog_candidates(parent: Path) -> tuple[Path, ...]: return tuple(candidates) -def _download_remote_pipeline(*, url: str, exit_stack: ExitStack) -> Path: +def _download_remote_pipeline( + *, + url: str, + exit_stack: ExitStack, + cache_path: Path | None = None, +) -> Path: request = Request(url, headers={"User-Agent": "adagio-cli"}) try: with urlopen(request, timeout=10) as response: @@ -194,13 +257,29 @@ def _download_remote_pipeline(*, url: str, exit_stack: ExitStack) -> Path: f"Failed to fetch pipeline from {url}: {error.reason}" ) from error - tempdir = Path( - exit_stack.enter_context(TemporaryDirectory(prefix="adagio-pipeline-")) - ) - pipeline_path = tempdir / "pipeline.adg" + if cache_path is None: + tempdir = Path( + exit_stack.enter_context(TemporaryDirectory(prefix="adagio-pipeline-")) + ) + pipeline_path = tempdir / "pipeline.adg" + else: + pipeline_path = cache_path + pipeline_path.parent.mkdir(parents=True, exist_ok=True) + pipeline_path.write_bytes(payload) return pipeline_path def _quote_slug(slug: str) -> str: return "/".join(quote(part) for part in Path(slug).parts) + + +def _cached_remote_pipeline_path( + *, + source_name: str, + slug: str, + cache_dir: Path | None, +) -> Path | None: + if cache_dir is None: + return None + return cache_dir / "adagio-pipelines" / source_name / slug / "pipeline.adg" diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 1efb4d6..f85c323 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -11,10 +11,15 @@ from rich.text import Text from .config import load_run_config -from .pipeline_sources import PipelineResolutionError, resolve_pipeline_reference +from .pipeline_sources import ( + PipelineResolution, + PipelineResolutionError, + resolve_pipeline_reference_details, +) from ..executors.base import TaskEnvironmentOverride from ..executors.cache_support import ( describe_cache_config, + resolve_cache_dir_path, resolve_cache_config, ) @@ -48,6 +53,7 @@ def run_pipeline_from_kwargs( required_params: list[str], *, console: Console, + resolved_pipeline: PipelineResolution | None = None, ) -> None: """Run a pipeline from resolved CLI keyword arguments.""" from ..model.arguments import AdagioArgumentsFile @@ -58,10 +64,18 @@ def run_pipeline_from_kwargs( with ExitStack() as exit_stack: try: - pipeline_path = resolve_pipeline_reference(pipeline, exit_stack=exit_stack) + pipeline_resolution = ( + resolved_pipeline + or resolve_pipeline_reference_details( + pipeline, + exit_stack=exit_stack, + download_cache_dir=_resolve_download_cache_dir(cache_dir), + ) + ) except PipelineResolutionError as error: _error_exit(console, str(error)) + pipeline_path = pipeline_resolution.path data = json.loads(pipeline_path.read_text(encoding="utf-8")) pipeline_data = data.get("spec", data) if isinstance(data, dict) else data parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) @@ -153,6 +167,7 @@ def run_pipeline_from_kwargs( suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) if not suppress_header: console.print(f"[bold]Pipeline:[/bold] {pipeline}") + console.print(f"[bold]Resolved from:[/bold] {pipeline_resolution.origin}") cache_config = resolve_cache_config( cwd=Path.cwd().resolve(), @@ -191,6 +206,12 @@ def _is_missing(value: Any) -> bool: return value is None or value == "" +def _resolve_download_cache_dir(raw_value: str | Path | None) -> Path | None: + if raw_value is None: + return None + return resolve_cache_dir_path(cwd=Path.cwd().resolve(), raw_value=raw_value) + + def _is_missing_output(value: Any) -> bool: if not isinstance(value, str): return True diff --git a/tests/test_pipeline_sources.py b/tests/test_pipeline_sources.py index c0f39dc..111058d 100644 --- a/tests/test_pipeline_sources.py +++ b/tests/test_pipeline_sources.py @@ -5,6 +5,7 @@ from contextlib import ExitStack from pathlib import Path from unittest.mock import patch +from urllib.error import HTTPError, URLError from rich.console import Console @@ -18,6 +19,7 @@ discover_workspace_catalog_roots, parse_pipeline_source_reference, resolve_pipeline_reference, + resolve_pipeline_reference_details, ) @@ -64,11 +66,18 @@ def test_parse_pipeline_source_reference_recognizes_source_slug_syntax( self, ) -> None: self.assertEqual( - parse_pipeline_source_reference("adagio-playbook/denoise"), - ("adagio-playbook", "denoise"), + parse_pipeline_source_reference("@adagio/microbial-diversity"), + ("adagio", "microbial-diversity"), ) + self.assertIsNone( + parse_pipeline_source_reference("@my-personal-channel/denoise") + ) + self.assertIsNone(parse_pipeline_source_reference("adagio/denoise")) self.assertIsNone(parse_pipeline_source_reference("./pipeline.adg")) self.assertIsNone(parse_pipeline_source_reference("pipeline.adg")) + self.assertIsNone(parse_pipeline_source_reference("@adagio/../denoise")) + self.assertIsNone(parse_pipeline_source_reference("@adagio/denoise/extra")) + self.assertIsNone(parse_pipeline_source_reference("@adagio/Denoise")) def test_discover_workspace_catalog_roots_finds_sibling_repo_from_worktree( self, @@ -112,13 +121,67 @@ def test_source_reference_resolves_from_local_catalog(self) -> None: with ExitStack() as exit_stack: resolved = resolve_pipeline_reference( - f"{DEFAULT_PIPELINE_SOURCE}/denoise", + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", exit_stack=exit_stack, sources=(source,), ) self.assertEqual(resolved, pipeline_path.resolve()) + def test_local_catalog_prefers_official_over_community(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + catalog_root = Path(tmpdir) / "adagio-pipelines" + official_path = ( + catalog_root / "pipelines" / "official" / "denoise" / "pipeline.adg" + ) + community_path = ( + catalog_root / "pipelines" / "community" / "denoise" / "pipeline.adg" + ) + official_path.parent.mkdir(parents=True) + community_path.parent.mkdir(parents=True) + official_path.write_text('{"source": "official"}', encoding="utf-8") + community_path.write_text('{"source": "community"}', encoding="utf-8") + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(LocalCatalogLocation(root=catalog_root),), + ) + + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertEqual(resolved, official_path.resolve()) + + def test_local_catalog_hit_skips_network_fallback(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + catalog_root = Path(tmpdir) / "adagio-pipelines" + pipeline_path = ( + catalog_root / "pipelines" / "official" / "denoise" / "pipeline.adg" + ) + pipeline_path.parent.mkdir(parents=True) + pipeline_path.write_text("{}", encoding="utf-8") + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=( + LocalCatalogLocation(root=catalog_root), + GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"), + ), + ) + + with patch("adagio.cli.pipeline_sources.urlopen") as mock_urlopen: + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertEqual(resolved, pipeline_path.resolve()) + mock_urlopen.assert_not_called() + def test_source_reference_falls_back_to_github_when_needed(self) -> None: source = PipelineSource( name=DEFAULT_PIPELINE_SOURCE, @@ -133,16 +196,171 @@ def test_source_reference_falls_back_to_github_when_needed(self) -> None: ) as mock_urlopen: with ExitStack() as exit_stack: resolved = resolve_pipeline_reference( - f"{DEFAULT_PIPELINE_SOURCE}/denoise", + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", exit_stack=exit_stack, sources=(source,), ) payload = json.loads(resolved.read_text(encoding="utf-8")) request = mock_urlopen.call_args.args[0] - self.assertIn("/pipelines/community/denoise/pipeline.adg", request.full_url) + self.assertIn("/pipelines/official/denoise/pipeline.adg", request.full_url) self.assertEqual(payload["spec"]["type"], "pipeline") + def test_source_reference_reports_remote_origin(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + + with patch( + "adagio.cli.pipeline_sources.urlopen", + return_value=_FakeResponse(b"{}"), + ): + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference_details( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertTrue(resolved.is_remote) + self.assertIn("/pipelines/official/denoise/pipeline.adg", resolved.origin) + + def test_remote_http_error_is_reported(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + + with patch( + "adagio.cli.pipeline_sources.urlopen", + side_effect=HTTPError( + url="https://example.invalid/pipeline.adg", + code=500, + msg="server error", + hdrs=None, + fp=None, + ), + ): + with ExitStack() as exit_stack: + with self.assertRaises(PipelineResolutionError) as error: + resolve_pipeline_reference( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertIn("HTTP 500", str(error.exception)) + + def test_remote_url_error_is_reported(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + + with patch( + "adagio.cli.pipeline_sources.urlopen", + side_effect=URLError("network unavailable"), + ): + with ExitStack() as exit_stack: + with self.assertRaises(PipelineResolutionError) as error: + resolve_pipeline_reference( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + ) + + self.assertIn("network unavailable", str(error.exception)) + + def test_remote_pipeline_downloads_to_cache_dir_when_provided(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + with tempfile.TemporaryDirectory() as tmpdir: + cache_dir = Path(tmpdir) / "cache" + expected_path = ( + cache_dir + / "adagio-pipelines" + / DEFAULT_PIPELINE_SOURCE + / "denoise" + / "pipeline.adg" + ) + + with patch( + "adagio.cli.pipeline_sources.urlopen", + return_value=_FakeResponse( + b'{"spec": {"type": "pipeline", "signature": {"inputs": [], "parameters": [], "outputs": []}, "graph": []}}' + ), + ) as mock_urlopen: + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference_details( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + download_cache_dir=cache_dir, + ) + payload = json.loads(resolved.path.read_text(encoding="utf-8")) + + request = mock_urlopen.call_args.args[0] + self.assertIn("/pipelines/official/denoise/pipeline.adg", request.full_url) + self.assertEqual(resolved.path, expected_path) + self.assertEqual(payload["spec"]["type"], "pipeline") + self.assertTrue(resolved.is_remote) + + def test_cached_remote_pipeline_short_circuits_network(self) -> None: + source = PipelineSource( + name=DEFAULT_PIPELINE_SOURCE, + locations=(GitHubCatalogLocation(owner="cymis", repo="adagio-pipelines"),), + ) + with tempfile.TemporaryDirectory() as tmpdir: + cache_dir = Path(tmpdir) / "cache" + cached_path = ( + cache_dir + / "adagio-pipelines" + / DEFAULT_PIPELINE_SOURCE + / "denoise" + / "pipeline.adg" + ) + cached_path.parent.mkdir(parents=True) + cached_path.write_text('{"cached": true}', encoding="utf-8") + + with patch("adagio.cli.pipeline_sources.urlopen") as mock_urlopen: + with ExitStack() as exit_stack: + resolved = resolve_pipeline_reference_details( + f"@{DEFAULT_PIPELINE_SOURCE}/denoise", + exit_stack=exit_stack, + sources=(source,), + download_cache_dir=cache_dir, + ) + + self.assertEqual(resolved.path, cached_path) + self.assertEqual(resolved.origin, str(cached_path)) + self.assertFalse(resolved.is_remote) + mock_urlopen.assert_not_called() + + def test_non_adagio_source_is_rejected(self) -> None: + with ExitStack() as exit_stack: + with self.assertRaises(PipelineResolutionError) as error: + resolve_pipeline_reference( + "@my-personal-channel/denoise", + exit_stack=exit_stack, + sources=(), + ) + + self.assertIn("Expected @adagio/slug", str(error.exception)) + + def test_invalid_at_reference_reports_reference_shape(self) -> None: + with ExitStack() as exit_stack: + with self.assertRaises(PipelineResolutionError) as error: + resolve_pipeline_reference( + "@adagio/../secret", + exit_stack=exit_stack, + sources=(), + ) + + self.assertIn("Expected @adagio/slug", str(error.exception)) + def test_missing_source_reference_reports_attempted_locations(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: source = PipelineSource( @@ -155,15 +373,13 @@ def test_missing_source_reference_reports_attempted_locations(self) -> None: with ExitStack() as exit_stack: with self.assertRaises(PipelineResolutionError) as error: resolve_pipeline_reference( - f"{DEFAULT_PIPELINE_SOURCE}/missing", + f"@{DEFAULT_PIPELINE_SOURCE}/missing", exit_stack=exit_stack, sources=(source,), ) message = str(error.exception) - self.assertIn( - "Pipeline reference 'adagio-playbook/missing' was not found.", message - ) + self.assertIn("Pipeline reference '@adagio/missing' was not found.", message) self.assertIn("pipelines/community/missing/pipeline.adg", message) @@ -191,7 +407,7 @@ def test_pipeline_show_accepts_source_reference(self) -> None: return_value=(source,), ): with patch("adagio.cli.pipeline.console", console): - show_pipeline(Path(f"{DEFAULT_PIPELINE_SOURCE}/denoise")) + show_pipeline(Path(f"@{DEFAULT_PIPELINE_SOURCE}/denoise")) self.assertIn("dada2.denoise_single", output.getvalue()) From d147a1897a52994ff39a0caec9af22b68ba7edac Mon Sep 17 00:00:00 2001 From: John Chase Date: Mon, 4 May 2026 21:59:58 -0700 Subject: [PATCH 44/44] remove formatting --- src/adagio/cli/dynamic.py | 24 +++++------------------- src/adagio/cli/main.py | 33 ++++++--------------------------- src/adagio/cli/runner.py | 12 ++---------- src/adagio/io.py | 4 ++-- src/adagio/model/arguments.py | 18 ++++++++---------- 5 files changed, 23 insertions(+), 68 deletions(-) diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py index 0bf1a16..c53cc5e 100644 --- a/src/adagio/cli/dynamic.py +++ b/src/adagio/cli/dynamic.py @@ -79,9 +79,7 @@ def _pipeline_type_label(type_hint: Any) -> str: return "TEXT" -def _display_type_label( - *, spec_type: str | None, type_hint: Any, is_input: bool -) -> str: +def _display_type_label(*, spec_type: str | None, type_hint: Any, is_input: bool) -> str: if is_input: return path_type_label(spec_type) @@ -269,9 +267,7 @@ def build_dynamic_run( visible_input_names = ( set(visible_input_names) if visible_input_names is not None else None ) - visible_param_names = ( - set(visible_param_names) if visible_param_names is not None else None - ) + visible_param_names = set(visible_param_names) if visible_param_names is not None else None visible_output_names = ( set(visible_output_names) if visible_output_names is not None else None ) @@ -444,9 +440,7 @@ def add_dynamic_option( required_input_specs = [spec for spec in input_specs if spec.required] optional_input_specs = [spec for spec in input_specs if not spec.required] required_param_specs = [spec for spec in param_specs if _is_required_param(spec)] - optional_param_specs = [ - spec for spec in param_specs if not _is_required_param(spec) - ] + optional_param_specs = [spec for spec in param_specs if not _is_required_param(spec)] def add_input_spec(spec: InputSpec) -> None: original = spec.name @@ -500,9 +494,7 @@ def add_param_spec(spec: ParamSpec) -> None: argument_value = argument_params.get(original) has_argument_default = not _is_missing(argument_value) display_default = ( - default - if default is not None - else (argument_value if has_argument_default else None) + default if default is not None else (argument_value if has_argument_default else None) ) display_required = is_required and display_default is None param_default = None @@ -603,10 +595,4 @@ def run( def _is_missing(value: Any) -> bool: - return ( - value is None - or value == "" - or value == "" - or value == [] - or value == {} - ) + return value is None or value == "" or value == "" or value == [] or value == {} diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py index 400bf94..93f6e52 100644 --- a/src/adagio/cli/main.py +++ b/src/adagio/cli/main.py @@ -69,11 +69,7 @@ def main(argv: list[str] | None = None) -> None: ShowParamsMode(show_mode_str) if show_mode_str else ShowParamsMode.REQUIRED ) except ValueError: - console.print( - CycloptsPanel( - "Invalid --show-params value. Use one of: all, missing, required." - ) - ) + console.print(CycloptsPanel("Invalid --show-params value. Use one of: all, missing, required.")) sys.exit(1) if pipeline_str is None: pipeline_str = positional_pipeline @@ -84,7 +80,6 @@ def main(argv: list[str] | None = None) -> None: help_format="rich", version=__version__, ) - @app.command def cache() -> None: """Manage the shared QIIME cache directory.""" @@ -193,9 +188,7 @@ def run( arguments_path_str = extract_flag_value(argv, "--arguments") config_path_str = extract_flag_value(argv, "--config") arguments_data = ( - _load_arguments_data(Path(arguments_path_str), console) - if arguments_path_str - else None + _load_arguments_data(Path(arguments_path_str), console) if arguments_path_str else None ) if config_path_str: load_run_config(Path(config_path_str)) @@ -214,12 +207,8 @@ def run( visible_input_names={spec.name for spec in visible_inputs}, visible_param_names={spec.name for spec in visible_params}, visible_output_names={spec.name for spec in visible_outputs}, - argument_inputs=arguments_data.get("inputs", {}) - if arguments_data - else None, - argument_params=arguments_data.get("parameters", {}) - if arguments_data - else None, + argument_inputs=arguments_data.get("inputs", {}) if arguments_data else None, + argument_params=arguments_data.get("parameters", {}) if arguments_data else None, run_handler=partial( run_pipeline_from_kwargs, console=console, @@ -287,23 +276,13 @@ def _load_arguments_data(path: Path, _console: Console | None = None) -> dict[st if not isinstance(data.get("inputs"), dict) or not isinstance( data.get("parameters"), dict ): - _con.print( - CycloptsPanel( - "Invalid arguments file: 'inputs' and 'parameters' must be objects." - ) - ) + _con.print(CycloptsPanel("Invalid arguments file: 'inputs' and 'parameters' must be objects.")) sys.exit(1) return data def _is_missing(value: Any) -> bool: - return ( - value is None - or value == "" - or value == "" - or value == [] - or value == {} - ) + return value is None or value == "" or value == "" or value == [] or value == {} def _resolve_pipeline( diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py index 597ec2a..0f8901d 100644 --- a/src/adagio/cli/runner.py +++ b/src/adagio/cli/runner.py @@ -125,9 +125,7 @@ def run_pipeline_from_kwargs( if isinstance(value, list): arguments.inputs[original] = [str(item) for item in value] elif isinstance(value, dict): - arguments.inputs[original] = { - str(key): str(item) for key, item in value.items() - } + arguments.inputs[original] = {str(key): str(item) for key, item in value.items()} else: arguments.inputs[original] = str(value) @@ -207,13 +205,7 @@ def run_pipeline_from_kwargs( def _is_missing(value: Any) -> bool: """Treat placeholders and null values as missing.""" - return ( - value is None - or value == "" - or value == "" - or value == [] - or value == {} - ) + return value is None or value == "" or value == "" or value == [] or value == {} def _resolve_download_cache_dir(raw_value: str | Path | None) -> Path | None: diff --git a/src/adagio/io.py b/src/adagio/io.py index 4d8cdb6..e43f74e 100644 --- a/src/adagio/io.py +++ b/src/adagio/io.py @@ -1,3 +1,4 @@ + from adagio.execution.proxy import ProxyMetadata, lift_parsl, IndexedProxyArtifact @@ -5,7 +6,6 @@ def load_input(*, ctx, source: str): from qiime2.sdk import Artifact from qiime2.sdk import PluginManager - PluginManager() with ctx.cache: @@ -34,7 +34,6 @@ def load_input_collection(*, ctx, sources): def load_metadata(*, ctx, source: str): from qiime2 import Artifact, Metadata import zipfile - if zipfile.is_zipfile(source): metadata = Artifact.load(source).view(Metadata) else: @@ -43,6 +42,7 @@ def load_metadata(*, ctx, source: str): return metadata + @lift_parsl(lambda fut: fut) def save_output(*, ctx, output, destination): output.save(destination) diff --git a/src/adagio/model/arguments.py b/src/adagio/model/arguments.py index b991815..8982ff5 100644 --- a/src/adagio/model/arguments.py +++ b/src/adagio/model/arguments.py @@ -12,23 +12,21 @@ class AdagioArguments(BaseModel): def __repr__(self): """Format arguments for display.""" - return "\n".join( - [ - *self._format_repr_sect(self.inputs, "inputs"), - *self._format_repr_sect(self.parameters, "parameters"), - *self._format_repr_sect(self.outputs, "outputs"), - ] - ) + return '\n'.join([ + *self._format_repr_sect(self.inputs, 'inputs'), + *self._format_repr_sect(self.parameters, 'parameters'), + *self._format_repr_sect(self.outputs, 'outputs'), + ]) def _format_repr_sect(self, section, name): """Format a single argument section.""" lines = [] if not section: - lines.append(f"{name}: {{}}") + lines.append(f'{name}: {{}}') else: - lines.append(f"{name}:") + lines.append(f'{name}:') for name, value in section.items(): - lines.append(f" {name}: {value!r}") + lines.append(f' {name}: {value!r}') return lines