Accuracy tweaks for story writer

This commit is contained in:
K. Hodges 2026-05-22 02:22:56 -07:00
parent 9157853c10
commit dee87434e7
8 changed files with 163 additions and 20 deletions

View File

@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str:
]
)
if stage.type == "file_writer":
allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
return "\n".join(
[
"Return complete file contents only.",
"Use one fenced block per file with this exact opening form:",
"```file:relative/path.py",
"```file:path/inside/project.ext",
"<complete file content>",
"```",
allowed,
"Do not include prose outside file blocks.",
"Include every file needed for the task, including tests.",
"Include only files required for this stage and task.",
"NightShift will generate the unified diff deterministically.",
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str:
return "Write the requested stage output in concise markdown."
def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
if not allowed_paths:
return "Use real project-relative paths, not placeholder paths."
paths = ", ".join(f"`{path}`" for path in allowed_paths)
return f"Use only paths under these project-relative targets: {paths}."
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
values: dict[str, str] = {}
for line in output.splitlines():

View File

@ -771,10 +771,14 @@ class PipelineRunner:
task_id=task.id,
)
rerun_outputs = dict(enriched_outputs)
rerun_outputs["invalid_file_writer_output"] = stdout
rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
stdout,
str(exc),
)
strict_notes = [
*retry_notes,
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
"Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.",
]
result = self.agent_executor.run_stage(
agent_stage,
@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str:
return "\n".join(kept)
def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str:
lines = [
f"Reason: {reason}",
f"Output length: {len(output)} characters",
]
lowered = output.lower()
if "```file:" in lowered or "```path:" in lowered:
lines.append("The output started a file block, but NightShift could not parse a complete closed block.")
else:
lines.append("The output did not contain a parseable file block.")
excerpt = output.strip()
if excerpt:
if len(excerpt) > max_chars:
excerpt = excerpt[:max_chars].rstrip() + "\n... <truncated>"
lines.extend(["", "Excerpt:", "```text", excerpt, "```"])
return "\n".join(lines)
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
recent = entries[-2:]
if len(recent) < 2:

View File

@ -23,7 +23,7 @@ experiment:
agents:
planner:
backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
model: nightshift-base
temperature: 0.4
num_ctx: 8192
num_predict: 4096
@ -31,15 +31,15 @@ agents:
drafter:
backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
model: nightshift-writer
temperature: 0.7
num_ctx: 8192
num_predict: 4096
num_ctx: 16384
num_predict: 8192
system_prompt: .nightshift/agents/drafter.md
continuity_reviewer:
backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
model: nightshift-base
temperature: 0.2
num_ctx: 8192
num_predict: 4096
@ -47,7 +47,7 @@ agents:
style_reviewer:
backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
model: nightshift-base
temperature: 0.3
num_ctx: 8192
num_predict: 4096
@ -55,9 +55,9 @@ agents:
state_updater:
backend: ollama
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
model: nightshift-writer
temperature: 0.2
num_ctx: 8192
num_ctx: 16384
num_predict: 4096
system_prompt: .nightshift/agents/state-updater.md
@ -83,6 +83,8 @@ pipeline:
type: file_writer
agent: drafter
output: scene-draft.patch
allowed_paths:
- story/chapters
- id: normalize_draft
type: patch_normalizer
@ -120,6 +122,11 @@ pipeline:
type: file_writer
agent: state_updater
output: state-update.patch
allowed_paths:
- story/plot-state.md
- story/characters.md
- story/timeline.md
- story/unresolved-threads.md
- id: normalize_state
type: patch_normalizer

View File

@ -161,7 +161,7 @@ class TerminalAnimation:
self.message = message
self.stream = stream or sys.stderr
self.interval_seconds = interval_seconds
self.enabled = enabled and should_style(self.stream)
self.enabled = enabled and should_animate(self.stream)
self._stop = threading.Event()
self._thread: threading.Thread | None = None
self._width = 0
@ -177,6 +177,7 @@ class TerminalAnimation:
def start(self) -> None:
if not self.enabled or self._thread is not None:
return
self._render_frame(0)
self._thread = threading.Thread(target=self._run, daemon=True)
self._thread.start()
@ -193,18 +194,21 @@ class TerminalAnimation:
self.message = message
def _run(self) -> None:
index = 0
index = 1
while not self._stop.is_set():
frame = self.frames[index % len(self.frames)]
with self._lock:
message = self.message
text = f"{frame} | {message}"
self._width = max(self._width, len(text))
self.stream.write("\r" + text.ljust(self._width))
self.stream.flush()
self._render_frame(index)
index += 1
self._stop.wait(self.interval_seconds)
def _render_frame(self, index: int) -> None:
frame = self.frames[index % len(self.frames)]
with self._lock:
message = self.message
text = f"{frame} | {message}"
self._width = max(self._width, len(text))
self.stream.write("\r" + text.ljust(self._width))
self.stream.flush()
def _clear(self) -> None:
if not self.enabled:
return
@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]:
frames = HOTDOG_ANIMATIONS["agent_thinking"]
return tuple(frames)
def should_animate(stream: TextIO | None = None) -> bool:
stream = stream or sys.stderr
if os.environ.get("TERM") == "dumb":
return False
if bool(getattr(stream, "isatty", lambda: False)()):
return True
if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()):
return True
if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()):
return True
return False
def should_style(stream: TextIO | None = None) -> bool:
stream = stream or sys.stdout
if os.environ.get("NO_COLOR"):

View File

@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase):
self.assertIn("On repair attempts", prompt)
self.assertIn("failed stage output", prompt)
self.assertIn("Use real project-relative paths", prompt)
self.assertNotIn("relative/path.py", prompt)
self.assertNotIn("including tests", prompt)
def test_file_writer_contract_includes_stage_allowed_paths(self) -> None:
task = parse_tasks(TASK_MD)[0]
prompt = build_prompt_bundle(
system_prompt="System rules",
stage=StageConfig(
id="write",
type="file_writer",
agent="writer",
allowed_paths=("story/chapters",),
),
project_context="Project context",
task=task,
previous_outputs={},
retry_notes=[],
)
self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
def test_command_agent_writes_output_and_returns_pass(self) -> None:
with tempfile.TemporaryDirectory() as directory:

View File

@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase):
self.assertTrue((root / "story" / "plot-state.md").exists())
self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
self.assertIn("type: file_writer", config)
self.assertIn("model: nightshift-writer", config)
self.assertIn("model: nightshift-base", config)
self.assertIn("story/chapters", config)
self.assertIn("story/worldbuilding.md", gitignore)
self.assertIn("story/chapters/**/*.md", gitignore)

View File

@ -715,6 +715,49 @@ Acceptance Criteria:
self.assertEqual(result.status, "complete")
self.assertIn("+new", patch.read_text(encoding="utf-8"))
def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
_write_common_files(root)
(root / "app.py").write_text("old\n", encoding="utf-8")
(root / "fake_writer.py").write_text(
"\n".join(
[
"import sys",
"prompt = sys.stdin.read()",
"if 'Previous file_writer output was invalid' not in prompt:",
" print('```file:app.py')",
" print('x' * 5000)",
"else:",
" (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))",
" print('```file:app.py')",
" print('new')",
" print('```')",
]
),
encoding="utf-8",
)
stages = (
StageConfig(id="write", type="file_writer", agent="writer"),
StageConfig(id="validate", type="patch_validator"),
)
config = make_config(root, stages)
config.agents["writer"] = AgentConfig(
id="writer",
backend="command",
command="python fake_writer.py",
system_prompt=Path("planner.md"),
)
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
result = runner.run_task(parse_tasks(TASK_MD)[0])
retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8")
self.assertEqual(result.status, "complete")
self.assertIn("invalid_file_writer_output_summary", retry_prompt)
self.assertIn("... <truncated>", retry_prompt)
self.assertLess(len(retry_prompt), 9000)
def test_patch_validator_rejects_unsafe_patch(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)

View File

@ -12,6 +12,7 @@ from nightshift.terminal import (
animation_frames,
format_banner,
format_console_event_line,
should_animate,
)
@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase):
self.assertEqual(stream.getvalue(), "")
def test_terminal_animation_renders_immediately_when_started(self) -> None:
stream = FakeTTY()
animation = TerminalAnimation(
name="status_dots",
message="Starting",
stream=stream,
interval_seconds=60,
)
animation.start()
output = stream.getvalue()
animation.stop()
self.assertIn("[. ] | Starting", output)
def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
stream = FakeTTY()
with patch.dict("os.environ", {"NO_COLOR": "1"}):
self.assertTrue(should_animate(stream))
def test_console_event_line_colors_success_and_failure(self) -> None:
success = format_console_event_line(
"2026-05-17T00:00:00Z",