From 6e03430a33abc274906de16910e3ff7c64749752 Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Fri, 22 May 2026 02:55:26 -0700 Subject: [PATCH] Recover usable drafts from failed runs --- docs/ideas.md | 77 +++++++++++++++++++++++++ nightshift/agents.py | 18 +++++- nightshift/patches.py | 2 + nightshift/pipeline.py | 125 ++++++++++++++++++++++++++++++++++++++++- tests/test_agents.py | 1 + tests/test_patches.py | 28 +++++++++ tests/test_pipeline.py | 61 +++++++++++++++++++- 7 files changed, 307 insertions(+), 5 deletions(-) diff --git a/docs/ideas.md b/docs/ideas.md index 8661d2b..825d770 100644 --- a/docs/ideas.md +++ b/docs/ideas.md @@ -35,6 +35,83 @@ Deterministic checks: Then optional model reviewer checks acceptance-criteria alignment. +## P0: Preserve Good Drafts During Repair + +When a generated file block contains useful allowed content plus disallowed or invalid extra content, avoid redrafting from scratch. + +Possible behavior: + +- keep the allowed candidate file artifact +- strip disallowed file blocks only when configured as safe for that stage +- continue with validation for the allowed content +- or ask the model for a minimal correction that preserves the accepted candidate + +For writing workflows, preserving a good scene is more valuable than forcing a full retry. + +## P0: Remove Runtime Overrides For Custom Ollama Models + +If a model is a tuned local Ollama model such as `nightshift-writer` or `nightshift-base`, prefer the Modelfile parameters unless the stage has a specific reason to override them. + +Candidate config cleanup: + +- remove `temperature` +- remove `num_ctx` +- remove `num_predict` +- remove `stop` if present + +This avoids NightShift accidentally overriding tuned custom-model behavior. + +## P1: Improve `what-happened` For Model Runs + +The report should identify usable intermediate work, not only final failure state. + +Examples: + +- model produced a valid scene candidate +- validation rejected extra state files +- recover candidate from `candidate-files//index.md` +- retry output was invalid or too short +- next recommended action + +This should make failed creative-writing runs reviewable without manually reading every artifact. + +## P1: Add Stage-Specific Task Views + +The same task may say both "write scene" and "update state", but those responsibilities belong to different stages. + +Stage prompts should receive a filtered task view: + +- drafter sees only scene-writing criteria +- state updater sees only durable state update criteria +- reviewers see criteria relevant to their review role + +This reduces prompt contradiction and makes deterministic stage rules easier for models to follow. + +## P1: Preserve Intra-Attempt Rerun Artifacts + +When NightShift re-runs an agent inside the same stage attempt, do not overwrite the previous artifact. + +Examples: + +- `draft_scene-agent-output.md` +- `draft_scene-agent-output-invalid-rerun-1.md` +- `draft_scene-agent-output-1.md` + +This keeps the initial useful output visible even when strict rerun output is worse. + +## P1: Add A Writing-Mode Validator + +Add deterministic checks for prose workflows: + +- scene file exists at requested path +- scene word count is within configured range +- drafter did not touch state files +- state updater did not touch chapter prose +- no TODOs, author notes, or bracket placeholders +- optional checks for repeated headings or accidental prompt leakage + +This should run before model review stages. + ## P2: Add A Test Analyzer Agent For TDD Defer until generated tests are stable. diff --git a/nightshift/agents.py b/nightshift/agents.py index f78c6c0..8cd0b15 100644 --- a/nightshift/agents.py +++ b/nightshift/agents.py @@ -519,8 +519,24 @@ def output_contract_for(stage: StageConfig) -> str: def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str: if not allowed_paths: return "Use real project-relative paths, not placeholder paths." + normalized = tuple(path.replace("\\", "/").rstrip("/") for path in allowed_paths) paths = ", ".join(f"`{path}`" for path in allowed_paths) - return f"Use only paths under these project-relative targets: {paths}." + guidance = f"Use only paths under these project-relative targets: {paths}." + if normalized == ("story/chapters",): + return ( + guidance + + " This is the drafting stage: write only scene prose; do not update plot state, " + "characters, timeline, unresolved threads, or other story state files." + ) + state_paths = { + "story/plot-state.md", + "story/characters.md", + "story/timeline.md", + "story/unresolved-threads.md", + } + if set(normalized).issubset(state_paths): + return guidance + " This is the state update stage: do not write or rewrite chapter prose." + return guidance def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]: diff --git a/nightshift/patches.py b/nightshift/patches.py index 3f8a2d1..051f5d1 100644 --- a/nightshift/patches.py +++ b/nightshift/patches.py @@ -114,6 +114,7 @@ def generate_patch_from_file_updates( updates: tuple[FileUpdate, ...], project_root: str | Path, safety: SafetyConfig, + allowed_paths: tuple[str, ...] = (), forbidden_paths: tuple[str, ...] = DEFAULT_FORBIDDEN_PATHS, ) -> str: root = resolve_project_root(project_root) @@ -128,6 +129,7 @@ def generate_patch_from_file_updates( raise PipelineError(f"File writer error: duplicate file block `{normalized_path}`.") seen[normalized_path] = update.content _validate_patch_path(normalized_path, root, scoped_roots, forbidden_paths) + _validate_allowed_patch_path(normalized_path, root, allowed_paths) file_path = resolve_inside_root(root, normalized_path, f"file update '{normalized_path}'") old_text = file_path.read_text(encoding="utf-8", errors="replace") if file_path.exists() else "" if old_text == update.content: diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py index 7aab9e3..534b86a 100644 --- a/nightshift/pipeline.py +++ b/nightshift/pipeline.py @@ -22,6 +22,7 @@ from .patches import ( DEFAULT_FORBIDDEN_PATHS, DEFAULT_MAX_CHANGED_LINES, DEFAULT_MAX_FILES, + FileUpdate, apply_patch_with_git, extract_unified_diff, format_patch_apply_result, @@ -693,7 +694,7 @@ class PipelineRunner: ) -> StageResult: if stage.agent is None: raise PipelineError(f"Pipeline error: file_writer stage '{stage.id}' must reference an agent.") - enriched_outputs = dict(previous_outputs) + enriched_outputs = _file_writer_previous_outputs(previous_outputs, retry_count) context_pack_path = self._latest_task_artifact(task.id, "context-pack.md") if context_pack_path is not None: enriched_outputs["context-pack.md"] = context_pack_path.read_text(encoding="utf-8", errors="replace") @@ -745,19 +746,28 @@ class PipelineRunner: raw_output = self._read_output(result.output_path) stdout = extract_agent_stdout(raw_output) invalid_rerun_done = False + candidate_index_path: Path | None = None while True: try: updates = parse_file_updates(stdout) + candidate_index_path = self._write_file_writer_candidates( + task.id, + stage, + updates, + retry_count, + ) patch = generate_patch_from_file_updates( updates, self.config.project.root, self.config.safety, + allowed_paths=stage.allowed_paths, forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS, ) patch_reason = "Deterministic patch written from file blocks." log_message = "Wrote deterministic patch from file blocks" break except PipelineError as exc: + reason = _file_writer_error_reason(stage, str(exc)) if ( "no file blocks found" in str(exc) and "diff --git " not in stdout @@ -773,7 +783,7 @@ class PipelineRunner: rerun_outputs = dict(enriched_outputs) rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary( stdout, - str(exc), + reason, ) strict_notes = [ *retry_notes, @@ -796,7 +806,6 @@ class PipelineRunner: patch = normalize_patch_text(stdout) except PipelineError: summary_filename = _writer_summary_filename(stage, retry_count) - reason = str(exc) if "generated patch has no changes" in reason: next_stage = self._stage_after_patch_flow(stage.id) reason = self._no_changes_reason(retry_count) @@ -836,6 +845,11 @@ class PipelineRunner: proposed_path.relative_to(self.config.project.root).as_posix(), retry_count=retry_count, retry_notes=retry_notes, + candidate_index_path=( + candidate_index_path.relative_to(self.config.project.root).as_posix() + if candidate_index_path + else None + ), ), ) self.logger.event( @@ -853,6 +867,47 @@ class PipelineRunner: context_update=f"Implementation summary: {summary_path.relative_to(self.config.project.root).as_posix()}", ) + def _write_file_writer_candidates( + self, + task_id: str, + stage: StageConfig, + updates: tuple[FileUpdate, ...], + retry_count: int, + ) -> Path: + if not updates: + raise PipelineError("File writer error: no candidate file blocks found.") + base = f"candidate-files/{stage.id}" + if retry_count: + base += f"-retry-{retry_count}" + lines = [ + "# Candidate Files", + "", + f"Stage: `{stage.id}`", + f"Retry: {retry_count}", + "", + "These are raw file blocks extracted before patch validation or apply.", + "", + "## Files", + "", + ] + seen: set[str] = set() + for index, update in enumerate(updates, start=1): + filename = f"{index:03d}-{_candidate_artifact_name(update.path)}" + while filename in seen: + filename = f"{index:03d}-{len(seen):03d}-{_candidate_artifact_name(update.path)}" + seen.add(filename) + artifact_name = f"{base}/{filename}" + artifact_path = self.artifacts.write_stage_output(task_id, artifact_name, update.content) + relative = artifact_path.relative_to(self.config.project.root).as_posix() + lines.extend( + [ + f"- Source path: `{update.path}`", + f" Artifact: `{relative}`", + ] + ) + lines.append("") + return self.artifacts.write_stage_output(task_id, f"{base}/index.md", "\n".join(lines)) + def _writer_agent_stage(self, stage: StageConfig, retry_count: int) -> StageConfig: suffix = f"-{retry_count}" if retry_count else "" return replace( @@ -1426,6 +1481,7 @@ def format_implementation_summary( patch_path: str, retry_count: int = 0, retry_notes: list[str] | None = None, + candidate_index_path: str | None = None, ) -> str: notes = retry_notes or [] lines = [ @@ -1435,6 +1491,7 @@ def format_implementation_summary( "Status: pass", f"Repair attempt: {retry_count}", f"Patch: `{patch_path}`", + f"Candidate files: `{candidate_index_path}`" if candidate_index_path else "Candidate files: ", "", "## Retry Feedback", "", @@ -1551,6 +1608,68 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int return "\n".join(lines) +def _file_writer_error_reason(stage: StageConfig, reason: str) -> str: + guidance = _file_writer_stage_guidance(stage) + if not guidance or "not allowed for this stage" not in reason: + return reason + return f"{reason} {guidance}" + + +def _file_writer_stage_guidance(stage: StageConfig) -> str: + allowed = tuple(path.replace("\\", "/").rstrip("/") for path in stage.allowed_paths) + if allowed == ("story/chapters",): + return ( + "This is the drafting stage: write only scene prose under `story/chapters/`. " + "Do not update plot state, characters, timeline, unresolved threads, or other story state files." + ) + state_paths = { + "story/plot-state.md", + "story/characters.md", + "story/timeline.md", + "story/unresolved-threads.md", + } + if set(allowed).issubset(state_paths) and allowed: + return ( + "This is the state update stage: update only durable story state files. " + "Do not rewrite scene prose or chapter files." + ) + if allowed: + return "Return file blocks only for the allowed paths configured on this stage." + return "" + + +def _candidate_artifact_name(path_text: str) -> str: + name = path_text.replace("\\", "/").strip().strip("/") + name = re.sub(r"[^A-Za-z0-9_.-]+", "_", name) + name = name.strip("._-") + return name or "candidate.txt" + + +def _file_writer_previous_outputs( + previous_outputs: dict[str, str], + retry_count: int, + max_chars: int = 1200, +) -> dict[str, str]: + if retry_count <= 0: + return dict(previous_outputs) + compacted: dict[str, str] = {} + for name, output in previous_outputs.items(): + compacted[name] = _compact_previous_output(output, max_chars=max_chars) + return compacted + + +def _compact_previous_output(output: str, max_chars: int = 1200) -> str: + if len(output) <= max_chars: + return output + head_chars = max_chars // 2 + tail_chars = max_chars - head_chars + return ( + output[:head_chars].rstrip() + + "\n\n... ...\n\n" + + output[-tail_chars:].lstrip() + ) + + def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool: recent = entries[-2:] if len(recent) < 2: diff --git a/tests/test_agents.py b/tests/test_agents.py index 611fd8e..9401164 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -92,6 +92,7 @@ class AgentExecutorTests(unittest.TestCase): ) self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt) + self.assertIn("This is the drafting stage", prompt) def test_command_agent_writes_output_and_returns_pass(self) -> None: with tempfile.TemporaryDirectory() as directory: diff --git a/tests/test_patches.py b/tests/test_patches.py index 8e6f852..3846583 100644 --- a/tests/test_patches.py +++ b/tests/test_patches.py @@ -269,6 +269,34 @@ two with self.assertRaisesRegex(PipelineError, "duplicate file block"): generate_patch_from_file_updates(updates, root, safety) + def test_file_updates_enforce_stage_allowed_paths(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + (root / "story" / "chapters").mkdir(parents=True) + safety = SafetyConfig( + require_clean_worktree=False, + scoped_paths=("story",), + allowed_commands=(), + forbidden_commands=(), + ) + updates = parse_file_updates( + """```file:story/chapters/scene.md +scene +``` +```file:story/plot-state.md +state +``` +""" + ) + + with self.assertRaisesRegex(PipelineError, "not allowed for this stage"): + generate_patch_from_file_updates( + updates, + root, + safety, + allowed_paths=("story/chapters",), + ) + def test_file_updates_allow_identical_duplicate_blocks(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index e78ccfa..ba85cee 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -12,7 +12,7 @@ from nightshift.config import ( SafetyConfig, StageConfig, ) -from nightshift.pipeline import PipelineRunner +from nightshift.pipeline import PipelineRunner, _file_writer_previous_outputs from nightshift.stages import StageResult from nightshift.tasks import parse_tasks @@ -589,6 +589,53 @@ Acceptance Criteria: self.assertTrue(agent_output.exists()) self.assertIn("diff --git a/app.py b/app.py", patch.read_text(encoding="utf-8")) self.assertIn("diff --git a/tests/test_app.py b/tests/test_app.py", patch.read_text(encoding="utf-8")) + candidate_index = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "candidate-files" / "write" / "index.md" + self.assertTrue(candidate_index.exists()) + self.assertIn("app.py", candidate_index.read_text(encoding="utf-8")) + + def test_file_writer_preserves_candidates_when_stage_paths_reject_extra_files(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + (root / "story" / "chapters").mkdir(parents=True) + (root / "fake_writer.py").write_text( + "\n".join( + [ + "print('```file:story/chapters/scene.md')", + "print('scene prose')", + "print('```')", + "print('```file:story/plot-state.md')", + "print('state')", + "print('```')", + ] + ), + encoding="utf-8", + ) + stages = ( + StageConfig( + id="draft_scene", + type="file_writer", + agent="writer", + allowed_paths=("story/chapters",), + ), + ) + config = make_config(root, stages, max_retries=0) + config.agents["writer"] = AgentConfig( + id="writer", + backend="command", + command="python fake_writer.py", + system_prompt=Path("planner.md"), + ) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + + result = runner.run_task(parse_tasks(TASK_MD)[0]) + + task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" + candidate = task_dir / "candidate-files" / "draft_scene" / "001-story_chapters_scene.md" + self.assertEqual(result.status, "failed") + self.assertIn("This is the drafting stage", result.reason) + self.assertTrue(candidate.exists()) + self.assertEqual(candidate.read_text(encoding="utf-8"), "scene prose\n") def test_file_writer_accepts_unified_diff_fallback(self) -> None: with tempfile.TemporaryDirectory() as directory: @@ -758,6 +805,18 @@ Acceptance Criteria: self.assertIn("... ", retry_prompt) self.assertLess(len(retry_prompt), 9000) + def test_file_writer_retry_compacts_large_previous_outputs(self) -> None: + outputs = { + "scene-draft.patch": "a" * 5000, + "draft-validation.md": "Patch validation failed", + } + + compacted = _file_writer_previous_outputs(outputs, retry_count=1, max_chars=100) + + self.assertIn("previous output truncated", compacted["scene-draft.patch"]) + self.assertLess(len(compacted["scene-draft.patch"]), 180) + self.assertEqual(compacted["draft-validation.md"], "Patch validation failed") + def test_patch_validator_rejects_unsafe_patch(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory)