Recover usable drafts from failed runs

This commit is contained in:
K. Hodges 2026-05-22 02:55:26 -07:00
parent dee87434e7
commit 6e03430a33
7 changed files with 307 additions and 5 deletions

View File

@ -35,6 +35,83 @@ Deterministic checks:
Then optional model reviewer checks acceptance-criteria alignment. Then optional model reviewer checks acceptance-criteria alignment.
## P0: Preserve Good Drafts During Repair
When a generated file block contains useful allowed content plus disallowed or invalid extra content, avoid redrafting from scratch.
Possible behavior:
- keep the allowed candidate file artifact
- strip disallowed file blocks only when configured as safe for that stage
- continue with validation for the allowed content
- or ask the model for a minimal correction that preserves the accepted candidate
For writing workflows, preserving a good scene is more valuable than forcing a full retry.
## P0: Remove Runtime Overrides For Custom Ollama Models
If a model is a tuned local Ollama model such as `nightshift-writer` or `nightshift-base`, prefer the Modelfile parameters unless the stage has a specific reason to override them.
Candidate config cleanup:
- remove `temperature`
- remove `num_ctx`
- remove `num_predict`
- remove `stop` if present
This avoids NightShift accidentally overriding tuned custom-model behavior.
## P1: Improve `what-happened` For Model Runs
The report should identify usable intermediate work, not only final failure state.
Examples:
- model produced a valid scene candidate
- validation rejected extra state files
- recover candidate from `candidate-files/<stage>/index.md`
- retry output was invalid or too short
- next recommended action
This should make failed creative-writing runs reviewable without manually reading every artifact.
## P1: Add Stage-Specific Task Views
The same task may say both "write scene" and "update state", but those responsibilities belong to different stages.
Stage prompts should receive a filtered task view:
- drafter sees only scene-writing criteria
- state updater sees only durable state update criteria
- reviewers see criteria relevant to their review role
This reduces prompt contradiction and makes deterministic stage rules easier for models to follow.
## P1: Preserve Intra-Attempt Rerun Artifacts
When NightShift re-runs an agent inside the same stage attempt, do not overwrite the previous artifact.
Examples:
- `draft_scene-agent-output.md`
- `draft_scene-agent-output-invalid-rerun-1.md`
- `draft_scene-agent-output-1.md`
This keeps the initial useful output visible even when strict rerun output is worse.
## P1: Add A Writing-Mode Validator
Add deterministic checks for prose workflows:
- scene file exists at requested path
- scene word count is within configured range
- drafter did not touch state files
- state updater did not touch chapter prose
- no TODOs, author notes, or bracket placeholders
- optional checks for repeated headings or accidental prompt leakage
This should run before model review stages.
## P2: Add A Test Analyzer Agent For TDD ## P2: Add A Test Analyzer Agent For TDD
Defer until generated tests are stable. Defer until generated tests are stable.

View File

@ -519,8 +519,24 @@ def output_contract_for(stage: StageConfig) -> str:
def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str: def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
if not allowed_paths: if not allowed_paths:
return "Use real project-relative paths, not placeholder paths." return "Use real project-relative paths, not placeholder paths."
normalized = tuple(path.replace("\\", "/").rstrip("/") for path in allowed_paths)
paths = ", ".join(f"`{path}`" for path in allowed_paths) paths = ", ".join(f"`{path}`" for path in allowed_paths)
return f"Use only paths under these project-relative targets: {paths}." guidance = f"Use only paths under these project-relative targets: {paths}."
if normalized == ("story/chapters",):
return (
guidance
+ " This is the drafting stage: write only scene prose; do not update plot state, "
"characters, timeline, unresolved threads, or other story state files."
)
state_paths = {
"story/plot-state.md",
"story/characters.md",
"story/timeline.md",
"story/unresolved-threads.md",
}
if set(normalized).issubset(state_paths):
return guidance + " This is the state update stage: do not write or rewrite chapter prose."
return guidance
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]: def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:

View File

@ -114,6 +114,7 @@ def generate_patch_from_file_updates(
updates: tuple[FileUpdate, ...], updates: tuple[FileUpdate, ...],
project_root: str | Path, project_root: str | Path,
safety: SafetyConfig, safety: SafetyConfig,
allowed_paths: tuple[str, ...] = (),
forbidden_paths: tuple[str, ...] = DEFAULT_FORBIDDEN_PATHS, forbidden_paths: tuple[str, ...] = DEFAULT_FORBIDDEN_PATHS,
) -> str: ) -> str:
root = resolve_project_root(project_root) root = resolve_project_root(project_root)
@ -128,6 +129,7 @@ def generate_patch_from_file_updates(
raise PipelineError(f"File writer error: duplicate file block `{normalized_path}`.") raise PipelineError(f"File writer error: duplicate file block `{normalized_path}`.")
seen[normalized_path] = update.content seen[normalized_path] = update.content
_validate_patch_path(normalized_path, root, scoped_roots, forbidden_paths) _validate_patch_path(normalized_path, root, scoped_roots, forbidden_paths)
_validate_allowed_patch_path(normalized_path, root, allowed_paths)
file_path = resolve_inside_root(root, normalized_path, f"file update '{normalized_path}'") file_path = resolve_inside_root(root, normalized_path, f"file update '{normalized_path}'")
old_text = file_path.read_text(encoding="utf-8", errors="replace") if file_path.exists() else "" old_text = file_path.read_text(encoding="utf-8", errors="replace") if file_path.exists() else ""
if old_text == update.content: if old_text == update.content:

View File

@ -22,6 +22,7 @@ from .patches import (
DEFAULT_FORBIDDEN_PATHS, DEFAULT_FORBIDDEN_PATHS,
DEFAULT_MAX_CHANGED_LINES, DEFAULT_MAX_CHANGED_LINES,
DEFAULT_MAX_FILES, DEFAULT_MAX_FILES,
FileUpdate,
apply_patch_with_git, apply_patch_with_git,
extract_unified_diff, extract_unified_diff,
format_patch_apply_result, format_patch_apply_result,
@ -693,7 +694,7 @@ class PipelineRunner:
) -> StageResult: ) -> StageResult:
if stage.agent is None: if stage.agent is None:
raise PipelineError(f"Pipeline error: file_writer stage '{stage.id}' must reference an agent.") raise PipelineError(f"Pipeline error: file_writer stage '{stage.id}' must reference an agent.")
enriched_outputs = dict(previous_outputs) enriched_outputs = _file_writer_previous_outputs(previous_outputs, retry_count)
context_pack_path = self._latest_task_artifact(task.id, "context-pack.md") context_pack_path = self._latest_task_artifact(task.id, "context-pack.md")
if context_pack_path is not None: if context_pack_path is not None:
enriched_outputs["context-pack.md"] = context_pack_path.read_text(encoding="utf-8", errors="replace") enriched_outputs["context-pack.md"] = context_pack_path.read_text(encoding="utf-8", errors="replace")
@ -745,19 +746,28 @@ class PipelineRunner:
raw_output = self._read_output(result.output_path) raw_output = self._read_output(result.output_path)
stdout = extract_agent_stdout(raw_output) stdout = extract_agent_stdout(raw_output)
invalid_rerun_done = False invalid_rerun_done = False
candidate_index_path: Path | None = None
while True: while True:
try: try:
updates = parse_file_updates(stdout) updates = parse_file_updates(stdout)
candidate_index_path = self._write_file_writer_candidates(
task.id,
stage,
updates,
retry_count,
)
patch = generate_patch_from_file_updates( patch = generate_patch_from_file_updates(
updates, updates,
self.config.project.root, self.config.project.root,
self.config.safety, self.config.safety,
allowed_paths=stage.allowed_paths,
forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS, forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS,
) )
patch_reason = "Deterministic patch written from file blocks." patch_reason = "Deterministic patch written from file blocks."
log_message = "Wrote deterministic patch from file blocks" log_message = "Wrote deterministic patch from file blocks"
break break
except PipelineError as exc: except PipelineError as exc:
reason = _file_writer_error_reason(stage, str(exc))
if ( if (
"no file blocks found" in str(exc) "no file blocks found" in str(exc)
and "diff --git " not in stdout and "diff --git " not in stdout
@ -773,7 +783,7 @@ class PipelineRunner:
rerun_outputs = dict(enriched_outputs) rerun_outputs = dict(enriched_outputs)
rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary( rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
stdout, stdout,
str(exc), reason,
) )
strict_notes = [ strict_notes = [
*retry_notes, *retry_notes,
@ -796,7 +806,6 @@ class PipelineRunner:
patch = normalize_patch_text(stdout) patch = normalize_patch_text(stdout)
except PipelineError: except PipelineError:
summary_filename = _writer_summary_filename(stage, retry_count) summary_filename = _writer_summary_filename(stage, retry_count)
reason = str(exc)
if "generated patch has no changes" in reason: if "generated patch has no changes" in reason:
next_stage = self._stage_after_patch_flow(stage.id) next_stage = self._stage_after_patch_flow(stage.id)
reason = self._no_changes_reason(retry_count) reason = self._no_changes_reason(retry_count)
@ -836,6 +845,11 @@ class PipelineRunner:
proposed_path.relative_to(self.config.project.root).as_posix(), proposed_path.relative_to(self.config.project.root).as_posix(),
retry_count=retry_count, retry_count=retry_count,
retry_notes=retry_notes, retry_notes=retry_notes,
candidate_index_path=(
candidate_index_path.relative_to(self.config.project.root).as_posix()
if candidate_index_path
else None
),
), ),
) )
self.logger.event( self.logger.event(
@ -853,6 +867,47 @@ class PipelineRunner:
context_update=f"Implementation summary: {summary_path.relative_to(self.config.project.root).as_posix()}", context_update=f"Implementation summary: {summary_path.relative_to(self.config.project.root).as_posix()}",
) )
def _write_file_writer_candidates(
self,
task_id: str,
stage: StageConfig,
updates: tuple[FileUpdate, ...],
retry_count: int,
) -> Path:
if not updates:
raise PipelineError("File writer error: no candidate file blocks found.")
base = f"candidate-files/{stage.id}"
if retry_count:
base += f"-retry-{retry_count}"
lines = [
"# Candidate Files",
"",
f"Stage: `{stage.id}`",
f"Retry: {retry_count}",
"",
"These are raw file blocks extracted before patch validation or apply.",
"",
"## Files",
"",
]
seen: set[str] = set()
for index, update in enumerate(updates, start=1):
filename = f"{index:03d}-{_candidate_artifact_name(update.path)}"
while filename in seen:
filename = f"{index:03d}-{len(seen):03d}-{_candidate_artifact_name(update.path)}"
seen.add(filename)
artifact_name = f"{base}/{filename}"
artifact_path = self.artifacts.write_stage_output(task_id, artifact_name, update.content)
relative = artifact_path.relative_to(self.config.project.root).as_posix()
lines.extend(
[
f"- Source path: `{update.path}`",
f" Artifact: `{relative}`",
]
)
lines.append("")
return self.artifacts.write_stage_output(task_id, f"{base}/index.md", "\n".join(lines))
def _writer_agent_stage(self, stage: StageConfig, retry_count: int) -> StageConfig: def _writer_agent_stage(self, stage: StageConfig, retry_count: int) -> StageConfig:
suffix = f"-{retry_count}" if retry_count else "" suffix = f"-{retry_count}" if retry_count else ""
return replace( return replace(
@ -1426,6 +1481,7 @@ def format_implementation_summary(
patch_path: str, patch_path: str,
retry_count: int = 0, retry_count: int = 0,
retry_notes: list[str] | None = None, retry_notes: list[str] | None = None,
candidate_index_path: str | None = None,
) -> str: ) -> str:
notes = retry_notes or [] notes = retry_notes or []
lines = [ lines = [
@ -1435,6 +1491,7 @@ def format_implementation_summary(
"Status: pass", "Status: pass",
f"Repair attempt: {retry_count}", f"Repair attempt: {retry_count}",
f"Patch: `{patch_path}`", f"Patch: `{patch_path}`",
f"Candidate files: `{candidate_index_path}`" if candidate_index_path else "Candidate files: <none>",
"", "",
"## Retry Feedback", "## Retry Feedback",
"", "",
@ -1551,6 +1608,68 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int
return "\n".join(lines) return "\n".join(lines)
def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
guidance = _file_writer_stage_guidance(stage)
if not guidance or "not allowed for this stage" not in reason:
return reason
return f"{reason} {guidance}"
def _file_writer_stage_guidance(stage: StageConfig) -> str:
allowed = tuple(path.replace("\\", "/").rstrip("/") for path in stage.allowed_paths)
if allowed == ("story/chapters",):
return (
"This is the drafting stage: write only scene prose under `story/chapters/`. "
"Do not update plot state, characters, timeline, unresolved threads, or other story state files."
)
state_paths = {
"story/plot-state.md",
"story/characters.md",
"story/timeline.md",
"story/unresolved-threads.md",
}
if set(allowed).issubset(state_paths) and allowed:
return (
"This is the state update stage: update only durable story state files. "
"Do not rewrite scene prose or chapter files."
)
if allowed:
return "Return file blocks only for the allowed paths configured on this stage."
return ""
def _candidate_artifact_name(path_text: str) -> str:
name = path_text.replace("\\", "/").strip().strip("/")
name = re.sub(r"[^A-Za-z0-9_.-]+", "_", name)
name = name.strip("._-")
return name or "candidate.txt"
def _file_writer_previous_outputs(
previous_outputs: dict[str, str],
retry_count: int,
max_chars: int = 1200,
) -> dict[str, str]:
if retry_count <= 0:
return dict(previous_outputs)
compacted: dict[str, str] = {}
for name, output in previous_outputs.items():
compacted[name] = _compact_previous_output(output, max_chars=max_chars)
return compacted
def _compact_previous_output(output: str, max_chars: int = 1200) -> str:
if len(output) <= max_chars:
return output
head_chars = max_chars // 2
tail_chars = max_chars - head_chars
return (
output[:head_chars].rstrip()
+ "\n\n... <previous output truncated for retry prompt> ...\n\n"
+ output[-tail_chars:].lstrip()
)
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool: def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
recent = entries[-2:] recent = entries[-2:]
if len(recent) < 2: if len(recent) < 2:

View File

@ -92,6 +92,7 @@ class AgentExecutorTests(unittest.TestCase):
) )
self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt) self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
self.assertIn("This is the drafting stage", prompt)
def test_command_agent_writes_output_and_returns_pass(self) -> None: def test_command_agent_writes_output_and_returns_pass(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:

View File

@ -269,6 +269,34 @@ two
with self.assertRaisesRegex(PipelineError, "duplicate file block"): with self.assertRaisesRegex(PipelineError, "duplicate file block"):
generate_patch_from_file_updates(updates, root, safety) generate_patch_from_file_updates(updates, root, safety)
def test_file_updates_enforce_stage_allowed_paths(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
(root / "story" / "chapters").mkdir(parents=True)
safety = SafetyConfig(
require_clean_worktree=False,
scoped_paths=("story",),
allowed_commands=(),
forbidden_commands=(),
)
updates = parse_file_updates(
"""```file:story/chapters/scene.md
scene
```
```file:story/plot-state.md
state
```
"""
)
with self.assertRaisesRegex(PipelineError, "not allowed for this stage"):
generate_patch_from_file_updates(
updates,
root,
safety,
allowed_paths=("story/chapters",),
)
def test_file_updates_allow_identical_duplicate_blocks(self) -> None: def test_file_updates_allow_identical_duplicate_blocks(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)

View File

@ -12,7 +12,7 @@ from nightshift.config import (
SafetyConfig, SafetyConfig,
StageConfig, StageConfig,
) )
from nightshift.pipeline import PipelineRunner from nightshift.pipeline import PipelineRunner, _file_writer_previous_outputs
from nightshift.stages import StageResult from nightshift.stages import StageResult
from nightshift.tasks import parse_tasks from nightshift.tasks import parse_tasks
@ -589,6 +589,53 @@ Acceptance Criteria:
self.assertTrue(agent_output.exists()) self.assertTrue(agent_output.exists())
self.assertIn("diff --git a/app.py b/app.py", patch.read_text(encoding="utf-8")) self.assertIn("diff --git a/app.py b/app.py", patch.read_text(encoding="utf-8"))
self.assertIn("diff --git a/tests/test_app.py b/tests/test_app.py", patch.read_text(encoding="utf-8")) self.assertIn("diff --git a/tests/test_app.py b/tests/test_app.py", patch.read_text(encoding="utf-8"))
candidate_index = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "candidate-files" / "write" / "index.md"
self.assertTrue(candidate_index.exists())
self.assertIn("app.py", candidate_index.read_text(encoding="utf-8"))
def test_file_writer_preserves_candidates_when_stage_paths_reject_extra_files(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
_write_common_files(root)
(root / "story" / "chapters").mkdir(parents=True)
(root / "fake_writer.py").write_text(
"\n".join(
[
"print('```file:story/chapters/scene.md')",
"print('scene prose')",
"print('```')",
"print('```file:story/plot-state.md')",
"print('state')",
"print('```')",
]
),
encoding="utf-8",
)
stages = (
StageConfig(
id="draft_scene",
type="file_writer",
agent="writer",
allowed_paths=("story/chapters",),
),
)
config = make_config(root, stages, max_retries=0)
config.agents["writer"] = AgentConfig(
id="writer",
backend="command",
command="python fake_writer.py",
system_prompt=Path("planner.md"),
)
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
result = runner.run_task(parse_tasks(TASK_MD)[0])
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001"
candidate = task_dir / "candidate-files" / "draft_scene" / "001-story_chapters_scene.md"
self.assertEqual(result.status, "failed")
self.assertIn("This is the drafting stage", result.reason)
self.assertTrue(candidate.exists())
self.assertEqual(candidate.read_text(encoding="utf-8"), "scene prose\n")
def test_file_writer_accepts_unified_diff_fallback(self) -> None: def test_file_writer_accepts_unified_diff_fallback(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
@ -758,6 +805,18 @@ Acceptance Criteria:
self.assertIn("... <truncated>", retry_prompt) self.assertIn("... <truncated>", retry_prompt)
self.assertLess(len(retry_prompt), 9000) self.assertLess(len(retry_prompt), 9000)
def test_file_writer_retry_compacts_large_previous_outputs(self) -> None:
outputs = {
"scene-draft.patch": "a" * 5000,
"draft-validation.md": "Patch validation failed",
}
compacted = _file_writer_previous_outputs(outputs, retry_count=1, max_chars=100)
self.assertIn("previous output truncated", compacted["scene-draft.patch"])
self.assertLess(len(compacted["scene-draft.patch"]), 180)
self.assertEqual(compacted["draft-validation.md"], "Patch validation failed")
def test_patch_validator_rejects_unsafe_patch(self) -> None: def test_patch_validator_rejects_unsafe_patch(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)