Recover usable drafts from failed runs

2026-06-14 18:18:36 +00:00 · 2026-05-22 02:55:26 -07:00 · 2026-05-22 02:55:26 -07:00 · 6e03430a33
commit 6e03430a33
parent dee87434e7
7 changed files with 307 additions and 5 deletions
--- a/docs/ideas.md
+++ b/docs/ideas.md
@ -35,6 +35,83 @@ Deterministic checks:
 Then optional model reviewer checks acceptance-criteria alignment.
 ## P0: Preserve Good Drafts During Repair
 When a generated file block contains useful allowed content plus disallowed or invalid extra content, avoid redrafting from scratch.
 Possible behavior:
 - keep the allowed candidate file artifact
 - strip disallowed file blocks only when configured as safe for that stage
 - continue with validation for the allowed content
 - or ask the model for a minimal correction that preserves the accepted candidate
 For writing workflows, preserving a good scene is more valuable than forcing a full retry.
 ## P0: Remove Runtime Overrides For Custom Ollama Models
 If a model is a tuned local Ollama model such as `nightshift-writer` or `nightshift-base`, prefer the Modelfile parameters unless the stage has a specific reason to override them.
 Candidate config cleanup:
 - remove `temperature`
 - remove `num_ctx`
 - remove `num_predict`
 - remove `stop` if present
 This avoids NightShift accidentally overriding tuned custom-model behavior.
 ## P1: Improve `what-happened` For Model Runs
 The report should identify usable intermediate work, not only final failure state.
 Examples:
 - model produced a valid scene candidate
 - validation rejected extra state files
 - recover candidate from `candidate-files/<stage>/index.md`
 - retry output was invalid or too short
 - next recommended action
 This should make failed creative-writing runs reviewable without manually reading every artifact.
 ## P1: Add Stage-Specific Task Views
 The same task may say both "write scene" and "update state", but those responsibilities belong to different stages.
 Stage prompts should receive a filtered task view:
 - drafter sees only scene-writing criteria
 - state updater sees only durable state update criteria
 - reviewers see criteria relevant to their review role
 This reduces prompt contradiction and makes deterministic stage rules easier for models to follow.
 ## P1: Preserve Intra-Attempt Rerun Artifacts
 When NightShift re-runs an agent inside the same stage attempt, do not overwrite the previous artifact.
 Examples:
 - `draft_scene-agent-output.md`
 - `draft_scene-agent-output-invalid-rerun-1.md`
 - `draft_scene-agent-output-1.md`
 This keeps the initial useful output visible even when strict rerun output is worse.
 ## P1: Add A Writing-Mode Validator
 Add deterministic checks for prose workflows:
 - scene file exists at requested path
 - scene word count is within configured range
 - drafter did not touch state files
 - state updater did not touch chapter prose
 - no TODOs, author notes, or bracket placeholders
 - optional checks for repeated headings or accidental prompt leakage
 This should run before model review stages.
 ## P2: Add A Test Analyzer Agent For TDD
 Defer until generated tests are stable.
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@ -519,8 +519,24 @@ def output_contract_for(stage: StageConfig) -> str:
 def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
    if not allowed_paths:
        return "Use real project-relative paths, not placeholder paths."
    normalized = tuple(path.replace("\\", "/").rstrip("/") for path in allowed_paths)
    paths = ", ".join(f"`{path}`" for path in allowed_paths)
-    return f"Use only paths under these project-relative targets: {paths}."
+    guidance = f"Use only paths under these project-relative targets: {paths}."
    if normalized == ("story/chapters",):
        return (
            guidance
            + " This is the drafting stage: write only scene prose; do not update plot state, "
            "characters, timeline, unresolved threads, or other story state files."
        )
    state_paths = {
        "story/plot-state.md",
        "story/characters.md",
        "story/timeline.md",
        "story/unresolved-threads.md",
    }
    if set(normalized).issubset(state_paths):
        return guidance + " This is the state update stage: do not write or rewrite chapter prose."
    return guidance
 def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
--- a/nightshift/patches.py
+++ b/nightshift/patches.py
@ -114,6 +114,7 @@ def generate_patch_from_file_updates(
    updates: tuple[FileUpdate, ...],
    project_root: str | Path,
    safety: SafetyConfig,
    allowed_paths: tuple[str, ...] = (),
    forbidden_paths: tuple[str, ...] = DEFAULT_FORBIDDEN_PATHS,
 ) -> str:
    root = resolve_project_root(project_root)
@ -128,6 +129,7 @@ def generate_patch_from_file_updates(
            raise PipelineError(f"File writer error: duplicate file block `{normalized_path}`.")
        seen[normalized_path] = update.content
        _validate_patch_path(normalized_path, root, scoped_roots, forbidden_paths)
        _validate_allowed_patch_path(normalized_path, root, allowed_paths)
        file_path = resolve_inside_root(root, normalized_path, f"file update '{normalized_path}'")
        old_text = file_path.read_text(encoding="utf-8", errors="replace") if file_path.exists() else ""
        if old_text == update.content:
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -22,6 +22,7 @@ from .patches import (
    DEFAULT_FORBIDDEN_PATHS,
    DEFAULT_MAX_CHANGED_LINES,
    DEFAULT_MAX_FILES,
    FileUpdate,
    apply_patch_with_git,
    extract_unified_diff,
    format_patch_apply_result,
@ -693,7 +694,7 @@ class PipelineRunner:
    ) -> StageResult:
        if stage.agent is None:
            raise PipelineError(f"Pipeline error: file_writer stage '{stage.id}' must reference an agent.")
-        enriched_outputs = dict(previous_outputs)
+        enriched_outputs = _file_writer_previous_outputs(previous_outputs, retry_count)
        context_pack_path = self._latest_task_artifact(task.id, "context-pack.md")
        if context_pack_path is not None:
            enriched_outputs["context-pack.md"] = context_pack_path.read_text(encoding="utf-8", errors="replace")
@ -745,19 +746,28 @@ class PipelineRunner:
            raw_output = self._read_output(result.output_path)
            stdout = extract_agent_stdout(raw_output)
        invalid_rerun_done = False
        candidate_index_path: Path | None = None
        while True:
            try:
                updates = parse_file_updates(stdout)
                candidate_index_path = self._write_file_writer_candidates(
                    task.id,
                    stage,
                    updates,
                    retry_count,
                )
                patch = generate_patch_from_file_updates(
                    updates,
                    self.config.project.root,
                    self.config.safety,
                    allowed_paths=stage.allowed_paths,
                    forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS,
                )
                patch_reason = "Deterministic patch written from file blocks."
                log_message = "Wrote deterministic patch from file blocks"
                break
            except PipelineError as exc:
                reason = _file_writer_error_reason(stage, str(exc))
                if (
                    "no file blocks found" in str(exc)
                    and "diff --git " not in stdout
@ -773,7 +783,7 @@ class PipelineRunner:
                    rerun_outputs = dict(enriched_outputs)
                    rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
                        stdout,
-                        str(exc),
+                        reason,
                    )
                    strict_notes = [
                        *retry_notes,
@ -796,7 +806,6 @@ class PipelineRunner:
                    patch = normalize_patch_text(stdout)
                except PipelineError:
                    summary_filename = _writer_summary_filename(stage, retry_count)
                    reason = str(exc)
                    if "generated patch has no changes" in reason:
                        next_stage = self._stage_after_patch_flow(stage.id)
                        reason = self._no_changes_reason(retry_count)
@ -836,6 +845,11 @@ class PipelineRunner:
                proposed_path.relative_to(self.config.project.root).as_posix(),
                retry_count=retry_count,
                retry_notes=retry_notes,
                candidate_index_path=(
                    candidate_index_path.relative_to(self.config.project.root).as_posix()
                    if candidate_index_path
                    else None
                ),
            ),
        )
        self.logger.event(
@ -853,6 +867,47 @@ class PipelineRunner:
            context_update=f"Implementation summary: {summary_path.relative_to(self.config.project.root).as_posix()}",
        )
    def _write_file_writer_candidates(
        self,
        task_id: str,
        stage: StageConfig,
        updates: tuple[FileUpdate, ...],
        retry_count: int,
    ) -> Path:
        if not updates:
            raise PipelineError("File writer error: no candidate file blocks found.")
        base = f"candidate-files/{stage.id}"
        if retry_count:
            base += f"-retry-{retry_count}"
        lines = [
            "# Candidate Files",
            "",
            f"Stage: `{stage.id}`",
            f"Retry: {retry_count}",
            "",
            "These are raw file blocks extracted before patch validation or apply.",
            "",
            "## Files",
            "",
        ]
        seen: set[str] = set()
        for index, update in enumerate(updates, start=1):
            filename = f"{index:03d}-{_candidate_artifact_name(update.path)}"
            while filename in seen:
                filename = f"{index:03d}-{len(seen):03d}-{_candidate_artifact_name(update.path)}"
            seen.add(filename)
            artifact_name = f"{base}/{filename}"
            artifact_path = self.artifacts.write_stage_output(task_id, artifact_name, update.content)
            relative = artifact_path.relative_to(self.config.project.root).as_posix()
            lines.extend(
                [
                    f"- Source path: `{update.path}`",
                    f"  Artifact: `{relative}`",
                ]
            )
        lines.append("")
        return self.artifacts.write_stage_output(task_id, f"{base}/index.md", "\n".join(lines))
    def _writer_agent_stage(self, stage: StageConfig, retry_count: int) -> StageConfig:
        suffix = f"-{retry_count}" if retry_count else ""
        return replace(
@ -1426,6 +1481,7 @@ def format_implementation_summary(
    patch_path: str,
    retry_count: int = 0,
    retry_notes: list[str] | None = None,
    candidate_index_path: str | None = None,
 ) -> str:
    notes = retry_notes or []
    lines = [
@ -1435,6 +1491,7 @@ def format_implementation_summary(
        "Status: pass",
        f"Repair attempt: {retry_count}",
        f"Patch: `{patch_path}`",
        f"Candidate files: `{candidate_index_path}`" if candidate_index_path else "Candidate files: <none>",
        "",
        "## Retry Feedback",
        "",
@ -1551,6 +1608,68 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int
    return "\n".join(lines)
 def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
    guidance = _file_writer_stage_guidance(stage)
    if not guidance or "not allowed for this stage" not in reason:
        return reason
    return f"{reason} {guidance}"
 def _file_writer_stage_guidance(stage: StageConfig) -> str:
    allowed = tuple(path.replace("\\", "/").rstrip("/") for path in stage.allowed_paths)
    if allowed == ("story/chapters",):
        return (
            "This is the drafting stage: write only scene prose under `story/chapters/`. "
            "Do not update plot state, characters, timeline, unresolved threads, or other story state files."
        )
    state_paths = {
        "story/plot-state.md",
        "story/characters.md",
        "story/timeline.md",
        "story/unresolved-threads.md",
    }
    if set(allowed).issubset(state_paths) and allowed:
        return (
            "This is the state update stage: update only durable story state files. "
            "Do not rewrite scene prose or chapter files."
        )
    if allowed:
        return "Return file blocks only for the allowed paths configured on this stage."
    return ""
 def _candidate_artifact_name(path_text: str) -> str:
    name = path_text.replace("\\", "/").strip().strip("/")
    name = re.sub(r"[^A-Za-z0-9_.-]+", "_", name)
    name = name.strip("._-")
    return name or "candidate.txt"
 def _file_writer_previous_outputs(
    previous_outputs: dict[str, str],
    retry_count: int,
    max_chars: int = 1200,
 ) -> dict[str, str]:
    if retry_count <= 0:
        return dict(previous_outputs)
    compacted: dict[str, str] = {}
    for name, output in previous_outputs.items():
        compacted[name] = _compact_previous_output(output, max_chars=max_chars)
    return compacted
 def _compact_previous_output(output: str, max_chars: int = 1200) -> str:
    if len(output) <= max_chars:
        return output
    head_chars = max_chars // 2
    tail_chars = max_chars - head_chars
    return (
        output[:head_chars].rstrip()
        + "\n\n... <previous output truncated for retry prompt> ...\n\n"
        + output[-tail_chars:].lstrip()
    )
 def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
    recent = entries[-2:]
    if len(recent) < 2:
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -92,6 +92,7 @@ class AgentExecutorTests(unittest.TestCase):
        )
        self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
        self.assertIn("This is the drafting stage", prompt)
    def test_command_agent_writes_output_and_returns_pass(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
--- a/tests/test_patches.py
+++ b/tests/test_patches.py
@ -269,6 +269,34 @@ two
            with self.assertRaisesRegex(PipelineError, "duplicate file block"):
                generate_patch_from_file_updates(updates, root, safety)
    def test_file_updates_enforce_stage_allowed_paths(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            (root / "story" / "chapters").mkdir(parents=True)
            safety = SafetyConfig(
                require_clean_worktree=False,
                scoped_paths=("story",),
                allowed_commands=(),
                forbidden_commands=(),
            )
            updates = parse_file_updates(
                """```file:story/chapters/scene.md
 scene
 ```
 ```file:story/plot-state.md
 state
 ```
 """
            )
            with self.assertRaisesRegex(PipelineError, "not allowed for this stage"):
                generate_patch_from_file_updates(
                    updates,
                    root,
                    safety,
                    allowed_paths=("story/chapters",),
                )
    def test_file_updates_allow_identical_duplicate_blocks(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -12,7 +12,7 @@ from nightshift.config import (
    SafetyConfig,
    StageConfig,
 )
-from nightshift.pipeline import PipelineRunner
+from nightshift.pipeline import PipelineRunner, _file_writer_previous_outputs
 from nightshift.stages import StageResult
 from nightshift.tasks import parse_tasks
@ -589,6 +589,53 @@ Acceptance Criteria:
            self.assertTrue(agent_output.exists())
            self.assertIn("diff --git a/app.py b/app.py", patch.read_text(encoding="utf-8"))
            self.assertIn("diff --git a/tests/test_app.py b/tests/test_app.py", patch.read_text(encoding="utf-8"))
            candidate_index = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "candidate-files" / "write" / "index.md"
            self.assertTrue(candidate_index.exists())
            self.assertIn("app.py", candidate_index.read_text(encoding="utf-8"))
    def test_file_writer_preserves_candidates_when_stage_paths_reject_extra_files(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            (root / "story" / "chapters").mkdir(parents=True)
            (root / "fake_writer.py").write_text(
                "\n".join(
                    [
                        "print('```file:story/chapters/scene.md')",
                        "print('scene prose')",
                        "print('```')",
                        "print('```file:story/plot-state.md')",
                        "print('state')",
                        "print('```')",
                    ]
                ),
                encoding="utf-8",
            )
            stages = (
                StageConfig(
                    id="draft_scene",
                    type="file_writer",
                    agent="writer",
                    allowed_paths=("story/chapters",),
                ),
            )
            config = make_config(root, stages, max_retries=0)
            config.agents["writer"] = AgentConfig(
                id="writer",
                backend="command",
                command="python fake_writer.py",
                system_prompt=Path("planner.md"),
            )
            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
            result = runner.run_task(parse_tasks(TASK_MD)[0])
            task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001"
            candidate = task_dir / "candidate-files" / "draft_scene" / "001-story_chapters_scene.md"
            self.assertEqual(result.status, "failed")
            self.assertIn("This is the drafting stage", result.reason)
            self.assertTrue(candidate.exists())
            self.assertEqual(candidate.read_text(encoding="utf-8"), "scene prose\n")
    def test_file_writer_accepts_unified_diff_fallback(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
@ -758,6 +805,18 @@ Acceptance Criteria:
            self.assertIn("... <truncated>", retry_prompt)
            self.assertLess(len(retry_prompt), 9000)
    def test_file_writer_retry_compacts_large_previous_outputs(self) -> None:
        outputs = {
            "scene-draft.patch": "a" * 5000,
            "draft-validation.md": "Patch validation failed",
        }
        compacted = _file_writer_previous_outputs(outputs, retry_count=1, max_chars=100)
        self.assertIn("previous output truncated", compacted["scene-draft.patch"])
        self.assertLess(len(compacted["scene-draft.patch"]), 180)
        self.assertEqual(compacted["draft-validation.md"], "Patch validation failed")
    def test_patch_validator_rejects_unsafe_patch(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)