Merge PR #1 from RJS

2026-06-14 10:08:37 +00:00 · 2026-05-24 00:55:02 -07:00 · 2026-05-24 00:55:02 -07:00 · 03438e1e8a
commit 03438e1e8a
parent 74d613de34 a0327956dc
8 changed files with 310 additions and 87 deletions
--- a/docs/config-reference.md
+++ b/docs/config-reference.md
@ -122,6 +122,24 @@ Semantic context stage:

 This stage builds a lightweight repository index of files, Python symbols, imports, and tests, then writes compact relevant snippets for the current task. It is keyword based with symbol-aware scoring, so it works without a vector database or network dependency.

+### `on_status` Stage Routing
+
+Instead of a single `on_fail` catch-all, use `on_status` to route each review status to a different stage:
+
+```yaml
+- id: review
+  type: agent_review
+  agent: reviewer
+  output: review.md
+  on_status:
+    pass: summarize
+    retry: implement
+    fail: plan
+    escalate: human
+```
+
+`on_status` supports `pass`, `fail`, `retry`, and `escalate` keys. For `pass`, it overrides sequential progression and any agent-supplied `next_stage`. For non-pass statuses, the lookup order is: `on_status[status]` → `on_fail` → `next_stage` (agent output).
+
 ## Failure, Retry, and Resource Artifacts

 Failed command and validation stages write deterministic diagnostics under the task artifact directory:
--- a/nightshift/config.py
+++ b/nightshift/config.py
@ -33,6 +33,7 @@ class SafetyConfig:
    allowed_commands: tuple[str, ...]
    forbidden_commands: tuple[str, ...]
    allowed_env: tuple[str, ...] = ()
+    skip_repo_parts: tuple[str, ...] = ()


@dataclass(frozen=True)
@ -61,7 +62,7 @@ class StageConfig:
    commands: tuple[str, ...] = ()
    output: str | None = None
    on_fail: str | None = None
-    on_pass: str | None = None
+    on_status: dict[str, str] | None = None
    shell: bool = True
    timeout_seconds: int | None = None
    working_dir: Path | None = None
@ -184,6 +185,9 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
    )

    safety_raw = _require_mapping(raw["safety"], "safety")
+    skip_repo_parts = _string_tuple(
+        safety_raw.get("skip_repo_parts", []), "safety.skip_repo_parts"
+    )
    safety = SafetyConfig(
        require_clean_worktree=_optional_bool(
            safety_raw.get("require_clean_worktree", False),
@ -195,6 +199,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
            safety_raw.get("forbidden_commands", []), "safety.forbidden_commands"
        ),
        allowed_env=_string_tuple(safety_raw.get("allowed_env", []), "safety.allowed_env"),
+        skip_repo_parts=skip_repo_parts,
    )

    agents_raw = _require_mapping(raw["agents"], "agents")
@ -393,7 +398,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
                commands=commands,
                output=_optional_string(stage_raw.get("output"), f"{stage_context}.output"),
                on_fail=_optional_string(stage_raw.get("on_fail"), f"{stage_context}.on_fail"),
-                on_pass=_optional_string(stage_raw.get("on_pass"), f"{stage_context}.on_pass"),
+                on_status=_parse_on_status(stage_raw, stage_context),
                shell=_optional_bool(stage_raw.get("shell", True), f"{stage_context}.shell"),
                timeout_seconds=timeout_seconds,
                working_dir=Path(working_dir_raw) if working_dir_raw else None,
@ -418,10 +423,13 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
            raise ConfigError(
                f"Config error: stage '{stage.id}' on_fail references unknown stage '{stage.on_fail}'."
            )
-        if stage.on_pass and stage.on_pass not in stage_ids:
-            raise ConfigError(
-                f"Config error: stage '{stage.id}' on_pass references unknown stage '{stage.on_pass}'."
-            )
+        if stage.on_status:
+            for status_key, target in stage.on_status.items():
+                if target not in stage_ids:
+                    raise ConfigError(
+                        f"Config error: stage '{stage.id}' on_status.{status_key} "
+                        f"references unknown stage '{target}'."
+                    )

    return NightShiftConfig(
        path=config_path,
@ -635,3 +643,27 @@ def _string_tuple(value: Any, context: str) -> tuple[str, ...]:
    if not isinstance(value, list) or not all(isinstance(item, str) and item for item in value):
        raise ConfigError(f"Config error: '{context}' must be a list of non-empty strings.")
    return tuple(value)
+
+
+VALID_STATUS_KEYS = frozenset({"pass", "fail", "retry", "escalate"})
+
+
+def _parse_on_status(raw: dict[str, Any], context: str) -> dict[str, str] | None:
+    on_status_raw = raw.get("on_status")
+    if on_status_raw is None:
+        return None
+    if not isinstance(on_status_raw, dict):
+        raise ConfigError(f"Config error: {context}.on_status must be a mapping.")
+    on_status: dict[str, str] = {}
+    for key, value in on_status_raw.items():
+        if key not in VALID_STATUS_KEYS:
+            raise ConfigError(
+                f"Config error: {context}.on_status invalid key '{key}'. "
+                f"Valid keys: {', '.join(sorted(VALID_STATUS_KEYS))}."
+            )
+        if not isinstance(value, str) or not value:
+            raise ConfigError(
+                f"Config error: {context}.on_status.{key} must be a non-empty string."
+            )
+        on_status[key] = value
+    return on_status
--- a/nightshift/patches.py
+++ b/nightshift/patches.py
@ -106,32 +106,16 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]:
        updates.append(FileUpdate(path=path, content=content))
    if not updates:
        raise PipelineError(
-            "File writer error: no file blocks found. Expected FILE: path with ---CONTENT---/---END--- or fenced blocks like ```file:path.py."
+            "File writer error: no file blocks found. Expected fenced blocks like ```file:path.to."
        )
    return tuple(updates)


 def _parse_delimited_file_updates(text: str) -> list[FileUpdate]:
-    updates: list[FileUpdate] = []
-    header_pattern = re.compile(r"(?m)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n")
-    matches = list(header_pattern.finditer(text))
-    for index, match in enumerate(matches):
-        path = match.group("path").strip().strip("`")
-        content_start = match.end()
-        next_file_start = matches[index + 1].start() if index + 1 < len(matches) else len(text)
-        raw_content = text[content_start:next_file_start]
-        end_match = re.search(r"(?m)^---END---\s*$", raw_content)
-        if end_match:
-            raw_content = raw_content[: end_match.start()]
-        content = raw_content.rstrip("\r\n") + "\n"
-        if path:
-            updates.append(FileUpdate(path=path, content=content))
-    if updates:
-        return updates
-
    pattern = re.compile(
        r"(?ms)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n(?P<content>.*?)\n---END---\s*$"
    )
+    updates: list[FileUpdate] = []
    for match in pattern.finditer(text):
        path = match.group("path").strip().strip("`")
        content = match.group("content")
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -200,22 +200,41 @@ class PipelineRunner:
                retry_notes.append(f"Context update from '{stage.id}': {result.context_update}")

            if result.status == "pass":
-                pass_target_stage = result.next_stage or stage.on_pass
-                if stage.type in {"agent_review", "review"} and result.next_stage:
+                if stage.on_status and "pass" in stage.on_status:
+                    target = stage.on_status["pass"]
+                    if target not in stage_indexes:
+                        final_status = "failed"
+                        final_reason = (
+                            f"Stage '{stage.id}' on_status.pass references unknown stage '{target}'."
+                        )
+                        break
                    self.logger.event(
-                        "stage.next_ignored",
-                        "Ignoring next_stage from passing review",
+                        "stage.next",
+                        "Jumping via on_status.pass",
                        run_id=self.artifacts.run_id,
                        task_id=task.id,
                        stage_id=stage.id,
-                        requested_next_stage=result.next_stage,
+                        next_stage=target,
                    )
-                    pass_target_stage = stage.on_pass
-                if pass_target_stage:
-                    if pass_target_stage not in stage_indexes:
+                    index = stage_indexes[target]
+                    continue
+                if stage.type in {"agent_review", "review"}:
+                    if result.next_stage:
+                        self.logger.event(
+                            "stage.next_ignored",
+                            "Ignoring next_stage from passing review",
+                            run_id=self.artifacts.run_id,
+                            task_id=task.id,
+                            stage_id=stage.id,
+                            requested_next_stage=result.next_stage,
+                        )
+                    index += 1
+                    continue
+                if result.next_stage:
+                    if result.next_stage not in stage_indexes:
                        final_status = "failed"
                        final_reason = (
-                            f"Stage '{stage.id}' requested unknown next stage '{pass_target_stage}'."
+                            f"Stage '{stage.id}' requested unknown next stage '{result.next_stage}'."
                        )
                        break
                    self.logger.event(
@ -224,14 +243,14 @@ class PipelineRunner:
                        run_id=self.artifacts.run_id,
                        task_id=task.id,
                        stage_id=stage.id,
-                        next_stage=pass_target_stage,
+                        next_stage=result.next_stage,
                    )
-                    index = stage_indexes[pass_target_stage]
+                    index = stage_indexes[result.next_stage]
                    continue
                index += 1
                continue

-            target_stage = _failure_target_stage(stage, result)
+            target_stage = _resolve_retry_target_stage(stage, result)
            analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
            if analysis_note:
                retry_notes.append(analysis_note)
@ -1840,14 +1859,10 @@ def _is_malformed_review_result(result: StageResult) -> bool:
    )


-def _failure_target_stage(stage: StageConfig, result: StageResult) -> str | None:
-    if stage.type not in {"agent_review", "review"}:
-        return result.next_stage or stage.on_fail
-    if _is_malformed_review_result(result):
+def _resolve_retry_target_stage(stage: StageConfig, result: StageResult) -> str | None:
+    if stage.type in {"agent_review", "review"} and _is_malformed_review_result(result):
        return None
-    if result.next_stage and result.next_stage != stage.id:
-        return result.next_stage
-    return stage.on_fail
+    return (stage.on_status or {}).get(result.status) or stage.on_fail or result.next_stage


 def _previous_continuity_review_passed(previous_outputs: dict[str, str]) -> bool:
--- a/nightshift/repo_tools.py
+++ b/nightshift/repo_tools.py
@ -17,7 +17,7 @@ from .safety import resolve_inside_root, resolve_project_root, validate_scoped_p
 DEFAULT_MAX_BYTES = 20_000
 DEFAULT_MAX_MATCHES = 100
 DEFAULT_MAX_LOOKUP_REQUESTS = 8
-SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"}
+DEFAULT_SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"}


@dataclass(frozen=True)
@ -45,6 +45,7 @@ class RepoTools:
            self.project_root,
            safety.scoped_paths or (".",),
        )
+        self.skipped_parts = DEFAULT_SKIPPED_REPO_PARTS | set(safety.skip_repo_parts)

    def list_files(self, path: str = ".", pattern: str = "*", max_files: int = 200) -> str:
        root = self._resolve_scoped(path, "list_files path")
@ -57,7 +58,7 @@ class RepoTools:
        relative_files = [
            _relative(item, self.project_root)
            for item in sorted(candidates)
-            if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root)
+            if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root, self.skipped_parts)
        ]
        lines = relative_files[:max_files]
        if len(relative_files) > max_files:
@ -66,7 +67,7 @@ class RepoTools:

    def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str:
        file_path = self._resolve_scoped(path, "read_file path")
-        if _is_skipped_repo_path(file_path, self.project_root):
+        if _is_skipped_repo_path(file_path, self.project_root, self.skipped_parts):
            return f"Path is skipped for repository lookup: {path}"
        if not file_path.exists() or not file_path.is_file():
            return f"File not found: {path}"
@ -89,7 +90,7 @@ class RepoTools:
        files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()]
        matches: list[str] = []
        for file_path in sorted(files):
-            if _is_skipped_repo_path(file_path, self.project_root):
+            if _is_skipped_repo_path(file_path, self.project_root, self.skipped_parts):
                continue
            try:
                text = file_path.read_text(encoding="utf-8", errors="replace")
@ -270,9 +271,9 @@ def _relative(path: Path, root: Path) -> str:
        return path.as_posix()


-def _is_skipped_repo_path(path: Path, root: Path) -> bool:
+def _is_skipped_repo_path(path: Path, root: Path, skipped_parts: set[str]) -> bool:
    try:
        parts = set(path.relative_to(root).parts)
    except ValueError:
        parts = set(path.parts)
-    return bool(parts & SKIPPED_REPO_PARTS)
+    return bool(parts & skipped_parts)
--- a/nightshift/templates.py
+++ b/nightshift/templates.py
@ -68,6 +68,12 @@ pipeline:
    - id: review
      type: agent_review
      agent: reviewer
+      # on_fail: implement        # catch-all for any non-pass status
+      # on_status:                # per-status routing (takes priority over on_fail)
+      #   pass: summarize
+      #   retry: implement
+      #   fail: plan
+      #   escalate: human
      on_fail: implement
      output: review.md

@ -333,7 +339,7 @@ If you need repository context before planning, output lookup requests exactly l

 lookup_requests:
 - tool: read_file
-  path: relative/path.py
+  path: relative/path.to
 - tool: grep
  path: .
  pattern: search_regex
@ -350,7 +356,7 @@ REAL_MODEL_IMPLEMENTER_PROMPT = """You are the implementation agent for NightShi

 Output only complete file content blocks.
 Use one fenced block per file with this exact opening form:
-```file:relative/path.py
+```file:relative/path.to
 <complete file content>
 ```
 Do not include explanations before or after the file blocks.
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -55,39 +55,57 @@ class ConfigTests(unittest.TestCase):
            with self.assertRaisesRegex(ConfigError, "on_fail references unknown stage"):
                load_config(config_path)

-    def test_on_pass_must_reference_existing_stage(self) -> None:
+    def test_on_status_parses_correctly(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            init_project(root)
            config_path = root / "nightshift.yaml"
-            config_path.write_text(
-                config_path.read_text(encoding="utf-8").replace(
-                    "on_fail: plan", "on_pass: missing_stage", 1
-                ),
-                encoding="utf-8",
-            )
-
-            with self.assertRaisesRegex(ConfigError, "on_pass references unknown stage"):
-                load_config(config_path)
-
-    def test_on_pass_loads(self) -> None:
-        with tempfile.TemporaryDirectory() as directory:
-            root = Path(directory)
-            init_project(root)
-            config_path = root / "nightshift.yaml"
-            config_path.write_text(
-                config_path.read_text(encoding="utf-8").replace(
-                    "      output: plan.md",
-                    "      output: plan.md\n      on_pass: summarize",
-                    1,
-                ),
-                encoding="utf-8",
+            text = config_path.read_text(encoding="utf-8")
+            text = text.replace(
+                "      on_fail: implement\n      output: review.md",
+                "      output: review.md\n      on_status:\n        pass: summarize\n        retry: implement\n        fail: plan",
            )
+            config_path.write_text(text, encoding="utf-8")

            config = load_config(config_path)
-            plan_stage = next(stage for stage in config.pipeline.stages if stage.id == "plan")
+            review_stage = next(s for s in config.pipeline.stages if s.id == "review")

-            self.assertEqual(plan_stage.on_pass, "summarize")
+            self.assertEqual(review_stage.on_status, {
+                "pass": "summarize",
+                "retry": "implement",
+                "fail": "plan",
+            })
+            self.assertIsNone(review_stage.on_fail)
+
+    def test_on_status_rejects_invalid_key(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            text = config_path.read_text(encoding="utf-8")
+            text = text.replace(
+                "      on_fail: implement\n      output: review.md",
+                "      output: review.md\n      on_status:\n        wat: broken",
+            )
+            config_path.write_text(text, encoding="utf-8")
+
+            with self.assertRaisesRegex(ConfigError, "on_status invalid key"):
+                load_config(config_path)
+
+    def test_on_status_references_unknown_stage(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            text = config_path.read_text(encoding="utf-8")
+            text = text.replace(
+                "      on_fail: implement\n      output: review.md",
+                "      output: review.md\n      on_status:\n        fail: missing_stage",
+            )
+            config_path.write_text(text, encoding="utf-8")
+
+            with self.assertRaisesRegex(ConfigError, "on_status.fail references unknown stage"):
+                load_config(config_path)

    def test_validate_requires_prompt_files(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
@ -371,6 +389,39 @@ class ConfigTests(unittest.TestCase):
            with self.assertRaisesRegex(ConfigError, "non-command stage 'plan'"):
                load_config(config_path)

+    def test_on_status_empty_key_fails(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            text = config_path.read_text(encoding="utf-8")
+            text = text.replace(
+                "      on_fail: implement\n      output: review.md",
+                "      output: review.md\n      on_status:\n        pass: ",
+            )
+            config_path.write_text(text, encoding="utf-8")
+
+            with self.assertRaisesRegex(ConfigError, "must be a non-empty string"):
+                load_config(config_path)
+
+    def test_on_fail_fallback_when_on_status_does_not_cover_status(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            text = config_path.read_text(encoding="utf-8")
+            text = text.replace(
+                "      on_fail: implement\n      output: review.md",
+                "      output: review.md\n      on_status:\n        pass: summarize\n      on_fail: implement",
+            )
+            config_path.write_text(text, encoding="utf-8")
+
+            config = load_config(config_path)
+            review_stage = next(s for s in config.pipeline.stages if s.id == "review")
+
+            self.assertEqual(review_stage.on_status, {"pass": "summarize"})
+            self.assertEqual(review_stage.on_fail, "implement")
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -105,29 +105,145 @@ class PipelineRunnerTests(unittest.TestCase):
            )
            self.assertIn("Modified Files", (root / ".nightshift" / "runs" / "test-run" / "run-summary.md").read_text(encoding="utf-8"))

-    def test_on_pass_jumps_to_configured_stage(self) -> None:
+    def test_on_status_routes_pass_to_target(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            stages = (
-                StageConfig(id="first", type="agent", agent="planner", output="first.md", on_pass="third"),
+                StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
                StageConfig(
-                    id="second",
-                    type="command",
-                    commands=('python -c "print(\'should not run\')"',),
-                    output="second-output.txt",
+                    id="review",
+                    type="agent_review",
+                    agent="reviewer",
+                    on_status={"pass": "summarize"},
+                    output="review.md",
                ),
-                StageConfig(id="third", type="summarize", output="final-notes.md"),
+                StageConfig(id="implement", type="agent", agent="planner", output="impl.md"),
+                StageConfig(id="summarize", type="summarize", output="final-notes.md"),
            )
            config = make_config(root, stages)
            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]

-            result = runner.run_task(parse_tasks(TASK_MD)[0])
+            result = runner.run_task(task)

-            task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001"
            self.assertEqual(result.status, "complete")
-            self.assertEqual([item.stage_id for item in result.stage_results], ["first", "third"])
-            self.assertFalse((task_dir / "second-output.txt").exists())
+            self.assertEqual(result.retry_count, 0)
+            self.assertEqual(
+                [r.stage_id for r in result.stage_results],
+                ["plan", "review", "summarize"],
+            )
+
+    def test_on_status_routes_fail_to_target(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            fail_reviewer = 'python -c "print(\'status: fail\\nreason: bad plan\')"'
+            stages = (
+                StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
+                StageConfig(
+                    id="review",
+                    type="agent_review",
+                    agent="reviewer",
+                    on_status={"fail": "plan"},
+                    output="review.md",
+                ),
+                StageConfig(id="summarize", type="summarize", output="final-notes.md"),
+            )
+            config = make_config(root, stages)
+            config.agents["reviewer"] = AgentConfig(
+                id="reviewer",
+                backend="command",
+                command=fail_reviewer,
+                system_prompt=Path("reviewer.md"),
+            )
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            self.assertEqual(result.status, "failed")
+            self.assertEqual(result.retry_count, 2)
+            self.assertEqual(
+                [r.stage_id for r in result.stage_results],
+                ["plan", "review", "plan", "review", "plan", "review"],
+            )
+
+    def test_on_status_escalate_routes_to_human_not_on_fail(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            escalate_reviewer = 'python -c "print(\'status: escalate\\nreason: need human\')"'
+            stages = (
+                StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
+                StageConfig(
+                    id="review",
+                    type="agent_review",
+                    agent="reviewer",
+                    on_status={
+                        "retry": "plan",
+                        "escalate": "human",
+                    },
+                    on_fail="plan",
+                    output="review.md",
+                ),
+                StageConfig(id="human", type="summarize", output="human-notes.md"),
+                StageConfig(id="summarize", type="summarize", output="final-notes.md"),
+            )
+            config = make_config(root, stages)
+            config.agents["reviewer"] = AgentConfig(
+                id="reviewer",
+                backend="command",
+                command=escalate_reviewer,
+                system_prompt=Path("reviewer.md"),
+            )
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            self.assertEqual(result.status, "complete")
+            self.assertEqual(result.retry_count, 1)
+            self.assertEqual(
+                [r.stage_id for r in result.stage_results],
+                ["plan", "review", "human", "summarize"],
+            )
+
+    def test_on_fail_fallback_when_status_not_in_on_status(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            fail_reviewer = 'python -c "print(\'status: fail\\nreason: bad\')"'
+            stages = (
+                StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
+                StageConfig(
+                    id="review",
+                    type="agent_review",
+                    agent="reviewer",
+                    on_status={"retry": "plan"},
+                    on_fail="implement",
+                    output="review.md",
+                ),
+                StageConfig(id="implement", type="agent", agent="planner", output="impl.md"),
+            )
+            config = make_config(root, stages)
+            config.agents["reviewer"] = AgentConfig(
+                id="reviewer",
+                backend="command",
+                command=fail_reviewer,
+                system_prompt=Path("reviewer.md"),
+            )
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            self.assertEqual(result.status, "failed")
+            self.assertEqual(result.retry_count, 2)
+            self.assertEqual(
+                [r.stage_id for r in result.stage_results],
+                ["plan", "review", "implement", "review", "implement", "review"],
+            )

    def test_task_preflight_fails_when_task_specific_test_file_is_missing(self) -> None:
        with tempfile.TemporaryDirectory() as directory: