Accuracy tweaks for story writer

2026-06-14 18:18:36 +00:00 · 2026-05-22 02:22:56 -07:00 · 2026-05-22 02:22:56 -07:00 · dee87434e7
commit dee87434e7
parent 9157853c10
8 changed files with 163 additions and 20 deletions
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str:
            ]
        )
    if stage.type == "file_writer":
        allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
        return "\n".join(
            [
                "Return complete file contents only.",
                "Use one fenced block per file with this exact opening form:",
-                "```file:relative/path.py",
+                "```file:path/inside/project.ext",
                "<complete file content>",
                "```",
                allowed,
                "Do not include prose outside file blocks.",
-                "Include every file needed for the task, including tests.",
+                "Include only files required for this stage and task.",
                "NightShift will generate the unified diff deterministically.",
                "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
                "Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str:
    return "Write the requested stage output in concise markdown."
 def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
    if not allowed_paths:
        return "Use real project-relative paths, not placeholder paths."
    paths = ", ".join(f"`{path}`" for path in allowed_paths)
    return f"Use only paths under these project-relative targets: {paths}."
 def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
    values: dict[str, str] = {}
    for line in output.splitlines():
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -771,10 +771,14 @@ class PipelineRunner:
                        task_id=task.id,
                    )
                    rerun_outputs = dict(enriched_outputs)
-                    rerun_outputs["invalid_file_writer_output"] = stdout
+                    rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
                        stdout,
                        str(exc),
                    )
                    strict_notes = [
                        *retry_notes,
                        "Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
                        "Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.",
                    ]
                    result = self.agent_executor.run_stage(
                        agent_stage,
@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str:
    return "\n".join(kept)
 def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str:
    lines = [
        f"Reason: {reason}",
        f"Output length: {len(output)} characters",
    ]
    lowered = output.lower()
    if "```file:" in lowered or "```path:" in lowered:
        lines.append("The output started a file block, but NightShift could not parse a complete closed block.")
    else:
        lines.append("The output did not contain a parseable file block.")
    excerpt = output.strip()
    if excerpt:
        if len(excerpt) > max_chars:
            excerpt = excerpt[:max_chars].rstrip() + "\n... <truncated>"
        lines.extend(["", "Excerpt:", "```text", excerpt, "```"])
    return "\n".join(lines)
 def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
    recent = entries[-2:]
    if len(recent) < 2:
--- a/nightshift/project_templates/tutorial-novel/nightshift.yaml
+++ b/nightshift/project_templates/tutorial-novel/nightshift.yaml
@ -23,7 +23,7 @@ experiment:
 agents:
  planner:
    backend: ollama
-    model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
+    model: nightshift-base
    temperature: 0.4
    num_ctx: 8192
    num_predict: 4096
@ -31,15 +31,15 @@ agents:
  drafter:
    backend: ollama
-    model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
+    model: nightshift-writer
    temperature: 0.7
-    num_ctx: 8192
+    num_ctx: 16384
-    num_predict: 4096
+    num_predict: 8192
    system_prompt: .nightshift/agents/drafter.md
  continuity_reviewer:
    backend: ollama
-    model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
+    model: nightshift-base
    temperature: 0.2
    num_ctx: 8192
    num_predict: 4096
@ -47,7 +47,7 @@ agents:
  style_reviewer:
    backend: ollama
-    model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
+    model: nightshift-base
    temperature: 0.3
    num_ctx: 8192
    num_predict: 4096
@ -55,9 +55,9 @@ agents:
  state_updater:
    backend: ollama
-    model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
+    model: nightshift-writer
    temperature: 0.2
-    num_ctx: 8192
+    num_ctx: 16384
    num_predict: 4096
    system_prompt: .nightshift/agents/state-updater.md
@ -83,6 +83,8 @@ pipeline:
      type: file_writer
      agent: drafter
      output: scene-draft.patch
      allowed_paths:
        - story/chapters
    - id: normalize_draft
      type: patch_normalizer
@ -120,6 +122,11 @@ pipeline:
      type: file_writer
      agent: state_updater
      output: state-update.patch
      allowed_paths:
        - story/plot-state.md
        - story/characters.md
        - story/timeline.md
        - story/unresolved-threads.md
    - id: normalize_state
      type: patch_normalizer
--- a/nightshift/terminal.py
+++ b/nightshift/terminal.py
@ -161,7 +161,7 @@ class TerminalAnimation:
        self.message = message
        self.stream = stream or sys.stderr
        self.interval_seconds = interval_seconds
-        self.enabled = enabled and should_style(self.stream)
+        self.enabled = enabled and should_animate(self.stream)
        self._stop = threading.Event()
        self._thread: threading.Thread | None = None
        self._width = 0
@ -177,6 +177,7 @@ class TerminalAnimation:
    def start(self) -> None:
        if not self.enabled or self._thread is not None:
            return
        self._render_frame(0)
        self._thread = threading.Thread(target=self._run, daemon=True)
        self._thread.start()
@ -193,8 +194,13 @@ class TerminalAnimation:
            self.message = message
    def _run(self) -> None:
-        index = 0
+        index = 1
        while not self._stop.is_set():
            self._render_frame(index)
            index += 1
            self._stop.wait(self.interval_seconds)
    def _render_frame(self, index: int) -> None:
        frame = self.frames[index % len(self.frames)]
        with self._lock:
            message = self.message
@ -202,8 +208,6 @@ class TerminalAnimation:
        self._width = max(self._width, len(text))
        self.stream.write("\r" + text.ljust(self._width))
        self.stream.flush()
            index += 1
            self._stop.wait(self.interval_seconds)
    def _clear(self) -> None:
        if not self.enabled:
@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]:
        frames = HOTDOG_ANIMATIONS["agent_thinking"]
    return tuple(frames)
 def should_animate(stream: TextIO | None = None) -> bool:
    stream = stream or sys.stderr
    if os.environ.get("TERM") == "dumb":
        return False
    if bool(getattr(stream, "isatty", lambda: False)()):
        return True
    if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()):
        return True
    if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()):
        return True
    return False
 def should_style(stream: TextIO | None = None) -> bool:
    stream = stream or sys.stdout
    if os.environ.get("NO_COLOR"):
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase):
        self.assertIn("On repair attempts", prompt)
        self.assertIn("failed stage output", prompt)
        self.assertIn("Use real project-relative paths", prompt)
        self.assertNotIn("relative/path.py", prompt)
        self.assertNotIn("including tests", prompt)
    def test_file_writer_contract_includes_stage_allowed_paths(self) -> None:
        task = parse_tasks(TASK_MD)[0]
        prompt = build_prompt_bundle(
            system_prompt="System rules",
            stage=StageConfig(
                id="write",
                type="file_writer",
                agent="writer",
                allowed_paths=("story/chapters",),
            ),
            project_context="Project context",
            task=task,
            previous_outputs={},
            retry_notes=[],
        )
        self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
    def test_command_agent_writes_output_and_returns_pass(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
--- a/tests/test_init.py
+++ b/tests/test_init.py
@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase):
            self.assertTrue((root / "story" / "plot-state.md").exists())
            self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
            self.assertIn("type: file_writer", config)
            self.assertIn("model: nightshift-writer", config)
            self.assertIn("model: nightshift-base", config)
            self.assertIn("story/chapters", config)
            self.assertIn("story/worldbuilding.md", gitignore)
            self.assertIn("story/chapters/**/*.md", gitignore)
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -715,6 +715,49 @@ Acceptance Criteria:
            self.assertEqual(result.status, "complete")
            self.assertIn("+new", patch.read_text(encoding="utf-8"))
    def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            (root / "app.py").write_text("old\n", encoding="utf-8")
            (root / "fake_writer.py").write_text(
                "\n".join(
                    [
                        "import sys",
                        "prompt = sys.stdin.read()",
                        "if 'Previous file_writer output was invalid' not in prompt:",
                        "    print('```file:app.py')",
                        "    print('x' * 5000)",
                        "else:",
                        "    (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))",
                        "    print('```file:app.py')",
                        "    print('new')",
                        "    print('```')",
                    ]
                ),
                encoding="utf-8",
            )
            stages = (
                StageConfig(id="write", type="file_writer", agent="writer"),
                StageConfig(id="validate", type="patch_validator"),
            )
            config = make_config(root, stages)
            config.agents["writer"] = AgentConfig(
                id="writer",
                backend="command",
                command="python fake_writer.py",
                system_prompt=Path("planner.md"),
            )
            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
            result = runner.run_task(parse_tasks(TASK_MD)[0])
            retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8")
            self.assertEqual(result.status, "complete")
            self.assertIn("invalid_file_writer_output_summary", retry_prompt)
            self.assertIn("... <truncated>", retry_prompt)
            self.assertLess(len(retry_prompt), 9000)
    def test_patch_validator_rejects_unsafe_patch(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
--- a/tests/test_terminal.py
+++ b/tests/test_terminal.py
@ -12,6 +12,7 @@ from nightshift.terminal import (
    animation_frames,
    format_banner,
    format_console_event_line,
    should_animate,
 )
@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase):
        self.assertEqual(stream.getvalue(), "")
    def test_terminal_animation_renders_immediately_when_started(self) -> None:
        stream = FakeTTY()
        animation = TerminalAnimation(
            name="status_dots",
            message="Starting",
            stream=stream,
            interval_seconds=60,
        )
        animation.start()
        output = stream.getvalue()
        animation.stop()
        self.assertIn("[.  ] | Starting", output)
    def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
        stream = FakeTTY()
        with patch.dict("os.environ", {"NO_COLOR": "1"}):
            self.assertTrue(should_animate(stream))
    def test_console_event_line_colors_success_and_failure(self) -> None:
        success = format_console_event_line(
            "2026-05-17T00:00:00Z",