Accuracy tweaks for story writer

This commit is contained in:
K. Hodges 2026-05-22 02:22:56 -07:00
parent 9157853c10
commit dee87434e7
8 changed files with 163 additions and 20 deletions

View File

@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str:
] ]
) )
if stage.type == "file_writer": if stage.type == "file_writer":
allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
return "\n".join( return "\n".join(
[ [
"Return complete file contents only.", "Return complete file contents only.",
"Use one fenced block per file with this exact opening form:", "Use one fenced block per file with this exact opening form:",
"```file:relative/path.py", "```file:path/inside/project.ext",
"<complete file content>", "<complete file content>",
"```", "```",
allowed,
"Do not include prose outside file blocks.", "Do not include prose outside file blocks.",
"Include every file needed for the task, including tests.", "Include only files required for this stage and task.",
"NightShift will generate the unified diff deterministically.", "NightShift will generate the unified diff deterministically.",
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.", "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.", "Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str:
return "Write the requested stage output in concise markdown." return "Write the requested stage output in concise markdown."
def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
if not allowed_paths:
return "Use real project-relative paths, not placeholder paths."
paths = ", ".join(f"`{path}`" for path in allowed_paths)
return f"Use only paths under these project-relative targets: {paths}."
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]: def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
values: dict[str, str] = {} values: dict[str, str] = {}
for line in output.splitlines(): for line in output.splitlines():

View File

@ -771,10 +771,14 @@ class PipelineRunner:
task_id=task.id, task_id=task.id,
) )
rerun_outputs = dict(enriched_outputs) rerun_outputs = dict(enriched_outputs)
rerun_outputs["invalid_file_writer_output"] = stdout rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
stdout,
str(exc),
)
strict_notes = [ strict_notes = [
*retry_notes, *retry_notes,
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.", "Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
"Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.",
] ]
result = self.agent_executor.run_stage( result = self.agent_executor.run_stage(
agent_stage, agent_stage,
@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str:
return "\n".join(kept) return "\n".join(kept)
def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str:
lines = [
f"Reason: {reason}",
f"Output length: {len(output)} characters",
]
lowered = output.lower()
if "```file:" in lowered or "```path:" in lowered:
lines.append("The output started a file block, but NightShift could not parse a complete closed block.")
else:
lines.append("The output did not contain a parseable file block.")
excerpt = output.strip()
if excerpt:
if len(excerpt) > max_chars:
excerpt = excerpt[:max_chars].rstrip() + "\n... <truncated>"
lines.extend(["", "Excerpt:", "```text", excerpt, "```"])
return "\n".join(lines)
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool: def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
recent = entries[-2:] recent = entries[-2:]
if len(recent) < 2: if len(recent) < 2:

View File

@ -23,7 +23,7 @@ experiment:
agents: agents:
planner: planner:
backend: ollama backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M model: nightshift-base
temperature: 0.4 temperature: 0.4
num_ctx: 8192 num_ctx: 8192
num_predict: 4096 num_predict: 4096
@ -31,15 +31,15 @@ agents:
drafter: drafter:
backend: ollama backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M model: nightshift-writer
temperature: 0.7 temperature: 0.7
num_ctx: 8192 num_ctx: 16384
num_predict: 4096 num_predict: 8192
system_prompt: .nightshift/agents/drafter.md system_prompt: .nightshift/agents/drafter.md
continuity_reviewer: continuity_reviewer:
backend: ollama backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M model: nightshift-base
temperature: 0.2 temperature: 0.2
num_ctx: 8192 num_ctx: 8192
num_predict: 4096 num_predict: 4096
@ -47,7 +47,7 @@ agents:
style_reviewer: style_reviewer:
backend: ollama backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M model: nightshift-base
temperature: 0.3 temperature: 0.3
num_ctx: 8192 num_ctx: 8192
num_predict: 4096 num_predict: 4096
@ -55,9 +55,9 @@ agents:
state_updater: state_updater:
backend: ollama backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M model: nightshift-writer
temperature: 0.2 temperature: 0.2
num_ctx: 8192 num_ctx: 16384
num_predict: 4096 num_predict: 4096
system_prompt: .nightshift/agents/state-updater.md system_prompt: .nightshift/agents/state-updater.md
@ -83,6 +83,8 @@ pipeline:
type: file_writer type: file_writer
agent: drafter agent: drafter
output: scene-draft.patch output: scene-draft.patch
allowed_paths:
- story/chapters
- id: normalize_draft - id: normalize_draft
type: patch_normalizer type: patch_normalizer
@ -120,6 +122,11 @@ pipeline:
type: file_writer type: file_writer
agent: state_updater agent: state_updater
output: state-update.patch output: state-update.patch
allowed_paths:
- story/plot-state.md
- story/characters.md
- story/timeline.md
- story/unresolved-threads.md
- id: normalize_state - id: normalize_state
type: patch_normalizer type: patch_normalizer

View File

@ -161,7 +161,7 @@ class TerminalAnimation:
self.message = message self.message = message
self.stream = stream or sys.stderr self.stream = stream or sys.stderr
self.interval_seconds = interval_seconds self.interval_seconds = interval_seconds
self.enabled = enabled and should_style(self.stream) self.enabled = enabled and should_animate(self.stream)
self._stop = threading.Event() self._stop = threading.Event()
self._thread: threading.Thread | None = None self._thread: threading.Thread | None = None
self._width = 0 self._width = 0
@ -177,6 +177,7 @@ class TerminalAnimation:
def start(self) -> None: def start(self) -> None:
if not self.enabled or self._thread is not None: if not self.enabled or self._thread is not None:
return return
self._render_frame(0)
self._thread = threading.Thread(target=self._run, daemon=True) self._thread = threading.Thread(target=self._run, daemon=True)
self._thread.start() self._thread.start()
@ -193,8 +194,13 @@ class TerminalAnimation:
self.message = message self.message = message
def _run(self) -> None: def _run(self) -> None:
index = 0 index = 1
while not self._stop.is_set(): while not self._stop.is_set():
self._render_frame(index)
index += 1
self._stop.wait(self.interval_seconds)
def _render_frame(self, index: int) -> None:
frame = self.frames[index % len(self.frames)] frame = self.frames[index % len(self.frames)]
with self._lock: with self._lock:
message = self.message message = self.message
@ -202,8 +208,6 @@ class TerminalAnimation:
self._width = max(self._width, len(text)) self._width = max(self._width, len(text))
self.stream.write("\r" + text.ljust(self._width)) self.stream.write("\r" + text.ljust(self._width))
self.stream.flush() self.stream.flush()
index += 1
self._stop.wait(self.interval_seconds)
def _clear(self) -> None: def _clear(self) -> None:
if not self.enabled: if not self.enabled:
@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]:
frames = HOTDOG_ANIMATIONS["agent_thinking"] frames = HOTDOG_ANIMATIONS["agent_thinking"]
return tuple(frames) return tuple(frames)
def should_animate(stream: TextIO | None = None) -> bool:
stream = stream or sys.stderr
if os.environ.get("TERM") == "dumb":
return False
if bool(getattr(stream, "isatty", lambda: False)()):
return True
if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()):
return True
if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()):
return True
return False
def should_style(stream: TextIO | None = None) -> bool: def should_style(stream: TextIO | None = None) -> bool:
stream = stream or sys.stdout stream = stream or sys.stdout
if os.environ.get("NO_COLOR"): if os.environ.get("NO_COLOR"):

View File

@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase):
self.assertIn("On repair attempts", prompt) self.assertIn("On repair attempts", prompt)
self.assertIn("failed stage output", prompt) self.assertIn("failed stage output", prompt)
self.assertIn("Use real project-relative paths", prompt)
self.assertNotIn("relative/path.py", prompt)
self.assertNotIn("including tests", prompt)
def test_file_writer_contract_includes_stage_allowed_paths(self) -> None:
task = parse_tasks(TASK_MD)[0]
prompt = build_prompt_bundle(
system_prompt="System rules",
stage=StageConfig(
id="write",
type="file_writer",
agent="writer",
allowed_paths=("story/chapters",),
),
project_context="Project context",
task=task,
previous_outputs={},
retry_notes=[],
)
self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
def test_command_agent_writes_output_and_returns_pass(self) -> None: def test_command_agent_writes_output_and_returns_pass(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:

View File

@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase):
self.assertTrue((root / "story" / "plot-state.md").exists()) self.assertTrue((root / "story" / "plot-state.md").exists())
self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists()) self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
self.assertIn("type: file_writer", config) self.assertIn("type: file_writer", config)
self.assertIn("model: nightshift-writer", config)
self.assertIn("model: nightshift-base", config)
self.assertIn("story/chapters", config) self.assertIn("story/chapters", config)
self.assertIn("story/worldbuilding.md", gitignore) self.assertIn("story/worldbuilding.md", gitignore)
self.assertIn("story/chapters/**/*.md", gitignore) self.assertIn("story/chapters/**/*.md", gitignore)

View File

@ -715,6 +715,49 @@ Acceptance Criteria:
self.assertEqual(result.status, "complete") self.assertEqual(result.status, "complete")
self.assertIn("+new", patch.read_text(encoding="utf-8")) self.assertIn("+new", patch.read_text(encoding="utf-8"))
def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
_write_common_files(root)
(root / "app.py").write_text("old\n", encoding="utf-8")
(root / "fake_writer.py").write_text(
"\n".join(
[
"import sys",
"prompt = sys.stdin.read()",
"if 'Previous file_writer output was invalid' not in prompt:",
" print('```file:app.py')",
" print('x' * 5000)",
"else:",
" (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))",
" print('```file:app.py')",
" print('new')",
" print('```')",
]
),
encoding="utf-8",
)
stages = (
StageConfig(id="write", type="file_writer", agent="writer"),
StageConfig(id="validate", type="patch_validator"),
)
config = make_config(root, stages)
config.agents["writer"] = AgentConfig(
id="writer",
backend="command",
command="python fake_writer.py",
system_prompt=Path("planner.md"),
)
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
result = runner.run_task(parse_tasks(TASK_MD)[0])
retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8")
self.assertEqual(result.status, "complete")
self.assertIn("invalid_file_writer_output_summary", retry_prompt)
self.assertIn("... <truncated>", retry_prompt)
self.assertLess(len(retry_prompt), 9000)
def test_patch_validator_rejects_unsafe_patch(self) -> None: def test_patch_validator_rejects_unsafe_patch(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)

View File

@ -12,6 +12,7 @@ from nightshift.terminal import (
animation_frames, animation_frames,
format_banner, format_banner,
format_console_event_line, format_console_event_line,
should_animate,
) )
@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase):
self.assertEqual(stream.getvalue(), "") self.assertEqual(stream.getvalue(), "")
def test_terminal_animation_renders_immediately_when_started(self) -> None:
stream = FakeTTY()
animation = TerminalAnimation(
name="status_dots",
message="Starting",
stream=stream,
interval_seconds=60,
)
animation.start()
output = stream.getvalue()
animation.stop()
self.assertIn("[. ] | Starting", output)
def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
stream = FakeTTY()
with patch.dict("os.environ", {"NO_COLOR": "1"}):
self.assertTrue(should_animate(stream))
def test_console_event_line_colors_success_and_failure(self) -> None: def test_console_event_line_colors_success_and_failure(self) -> None:
success = format_console_event_line( success = format_console_event_line(
"2026-05-17T00:00:00Z", "2026-05-17T00:00:00Z",