From dee87434e73185b50c1bbc39a156972cc50dad7d Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Fri, 22 May 2026 02:22:56 -0700 Subject: [PATCH] Accuracy tweaks for story writer --- nightshift/agents.py | 13 +++++- nightshift/pipeline.py | 24 ++++++++++- .../tutorial-novel/nightshift.yaml | 23 ++++++---- nightshift/terminal.py | 36 ++++++++++++---- tests/test_agents.py | 21 +++++++++ tests/test_init.py | 2 + tests/test_pipeline.py | 43 +++++++++++++++++++ tests/test_terminal.py | 21 +++++++++ 8 files changed, 163 insertions(+), 20 deletions(-) diff --git a/nightshift/agents.py b/nightshift/agents.py index 0c09ab7..f78c6c0 100644 --- a/nightshift/agents.py +++ b/nightshift/agents.py @@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str: ] ) if stage.type == "file_writer": + allowed = _format_allowed_file_writer_paths(stage.allowed_paths) return "\n".join( [ "Return complete file contents only.", "Use one fenced block per file with this exact opening form:", - "```file:relative/path.py", + "```file:path/inside/project.ext", "", "```", + allowed, "Do not include prose outside file blocks.", - "Include every file needed for the task, including tests.", + "Include only files required for this stage and task.", "NightShift will generate the unified diff deterministically.", "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.", "Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.", @@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str: return "Write the requested stage output in concise markdown." +def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str: + if not allowed_paths: + return "Use real project-relative paths, not placeholder paths." + paths = ", ".join(f"`{path}`" for path in allowed_paths) + return f"Use only paths under these project-relative targets: {paths}." + + def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]: values: dict[str, str] = {} for line in output.splitlines(): diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py index cf59d1f..7aab9e3 100644 --- a/nightshift/pipeline.py +++ b/nightshift/pipeline.py @@ -771,10 +771,14 @@ class PipelineRunner: task_id=task.id, ) rerun_outputs = dict(enriched_outputs) - rerun_outputs["invalid_file_writer_output"] = stdout + rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary( + stdout, + str(exc), + ) strict_notes = [ *retry_notes, "Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.", + "Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.", ] result = self.agent_executor.run_stage( agent_stage, @@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str: return "\n".join(kept) +def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str: + lines = [ + f"Reason: {reason}", + f"Output length: {len(output)} characters", + ] + lowered = output.lower() + if "```file:" in lowered or "```path:" in lowered: + lines.append("The output started a file block, but NightShift could not parse a complete closed block.") + else: + lines.append("The output did not contain a parseable file block.") + excerpt = output.strip() + if excerpt: + if len(excerpt) > max_chars: + excerpt = excerpt[:max_chars].rstrip() + "\n... " + lines.extend(["", "Excerpt:", "```text", excerpt, "```"]) + return "\n".join(lines) + + def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool: recent = entries[-2:] if len(recent) < 2: diff --git a/nightshift/project_templates/tutorial-novel/nightshift.yaml b/nightshift/project_templates/tutorial-novel/nightshift.yaml index 41860b0..33b37a1 100644 --- a/nightshift/project_templates/tutorial-novel/nightshift.yaml +++ b/nightshift/project_templates/tutorial-novel/nightshift.yaml @@ -23,7 +23,7 @@ experiment: agents: planner: backend: ollama - model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M + model: nightshift-base temperature: 0.4 num_ctx: 8192 num_predict: 4096 @@ -31,15 +31,15 @@ agents: drafter: backend: ollama - model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M + model: nightshift-writer temperature: 0.7 - num_ctx: 8192 - num_predict: 4096 + num_ctx: 16384 + num_predict: 8192 system_prompt: .nightshift/agents/drafter.md continuity_reviewer: backend: ollama - model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M + model: nightshift-base temperature: 0.2 num_ctx: 8192 num_predict: 4096 @@ -47,7 +47,7 @@ agents: style_reviewer: backend: ollama - model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M + model: nightshift-base temperature: 0.3 num_ctx: 8192 num_predict: 4096 @@ -55,9 +55,9 @@ agents: state_updater: backend: ollama - model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M + model: nightshift-writer temperature: 0.2 - num_ctx: 8192 + num_ctx: 16384 num_predict: 4096 system_prompt: .nightshift/agents/state-updater.md @@ -83,6 +83,8 @@ pipeline: type: file_writer agent: drafter output: scene-draft.patch + allowed_paths: + - story/chapters - id: normalize_draft type: patch_normalizer @@ -120,6 +122,11 @@ pipeline: type: file_writer agent: state_updater output: state-update.patch + allowed_paths: + - story/plot-state.md + - story/characters.md + - story/timeline.md + - story/unresolved-threads.md - id: normalize_state type: patch_normalizer diff --git a/nightshift/terminal.py b/nightshift/terminal.py index 46a954d..b6bc4e8 100644 --- a/nightshift/terminal.py +++ b/nightshift/terminal.py @@ -161,7 +161,7 @@ class TerminalAnimation: self.message = message self.stream = stream or sys.stderr self.interval_seconds = interval_seconds - self.enabled = enabled and should_style(self.stream) + self.enabled = enabled and should_animate(self.stream) self._stop = threading.Event() self._thread: threading.Thread | None = None self._width = 0 @@ -177,6 +177,7 @@ class TerminalAnimation: def start(self) -> None: if not self.enabled or self._thread is not None: return + self._render_frame(0) self._thread = threading.Thread(target=self._run, daemon=True) self._thread.start() @@ -193,18 +194,21 @@ class TerminalAnimation: self.message = message def _run(self) -> None: - index = 0 + index = 1 while not self._stop.is_set(): - frame = self.frames[index % len(self.frames)] - with self._lock: - message = self.message - text = f"{frame} | {message}" - self._width = max(self._width, len(text)) - self.stream.write("\r" + text.ljust(self._width)) - self.stream.flush() + self._render_frame(index) index += 1 self._stop.wait(self.interval_seconds) + def _render_frame(self, index: int) -> None: + frame = self.frames[index % len(self.frames)] + with self._lock: + message = self.message + text = f"{frame} | {message}" + self._width = max(self._width, len(text)) + self.stream.write("\r" + text.ljust(self._width)) + self.stream.flush() + def _clear(self) -> None: if not self.enabled: return @@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]: frames = HOTDOG_ANIMATIONS["agent_thinking"] return tuple(frames) + +def should_animate(stream: TextIO | None = None) -> bool: + stream = stream or sys.stderr + if os.environ.get("TERM") == "dumb": + return False + if bool(getattr(stream, "isatty", lambda: False)()): + return True + if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()): + return True + if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()): + return True + return False + + def should_style(stream: TextIO | None = None) -> bool: stream = stream or sys.stdout if os.environ.get("NO_COLOR"): diff --git a/tests/test_agents.py b/tests/test_agents.py index 016aae9..611fd8e 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase): self.assertIn("On repair attempts", prompt) self.assertIn("failed stage output", prompt) + self.assertIn("Use real project-relative paths", prompt) + self.assertNotIn("relative/path.py", prompt) + self.assertNotIn("including tests", prompt) + + def test_file_writer_contract_includes_stage_allowed_paths(self) -> None: + task = parse_tasks(TASK_MD)[0] + prompt = build_prompt_bundle( + system_prompt="System rules", + stage=StageConfig( + id="write", + type="file_writer", + agent="writer", + allowed_paths=("story/chapters",), + ), + project_context="Project context", + task=task, + previous_outputs={}, + retry_notes=[], + ) + + self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt) def test_command_agent_writes_output_and_returns_pass(self) -> None: with tempfile.TemporaryDirectory() as directory: diff --git a/tests/test_init.py b/tests/test_init.py index f0a3afe..9fbd417 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase): self.assertTrue((root / "story" / "plot-state.md").exists()) self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists()) self.assertIn("type: file_writer", config) + self.assertIn("model: nightshift-writer", config) + self.assertIn("model: nightshift-base", config) self.assertIn("story/chapters", config) self.assertIn("story/worldbuilding.md", gitignore) self.assertIn("story/chapters/**/*.md", gitignore) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index d2e9c04..e78ccfa 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -715,6 +715,49 @@ Acceptance Criteria: self.assertEqual(result.status, "complete") self.assertIn("+new", patch.read_text(encoding="utf-8")) + def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + (root / "app.py").write_text("old\n", encoding="utf-8") + (root / "fake_writer.py").write_text( + "\n".join( + [ + "import sys", + "prompt = sys.stdin.read()", + "if 'Previous file_writer output was invalid' not in prompt:", + " print('```file:app.py')", + " print('x' * 5000)", + "else:", + " (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))", + " print('```file:app.py')", + " print('new')", + " print('```')", + ] + ), + encoding="utf-8", + ) + stages = ( + StageConfig(id="write", type="file_writer", agent="writer"), + StageConfig(id="validate", type="patch_validator"), + ) + config = make_config(root, stages) + config.agents["writer"] = AgentConfig( + id="writer", + backend="command", + command="python fake_writer.py", + system_prompt=Path("planner.md"), + ) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + + result = runner.run_task(parse_tasks(TASK_MD)[0]) + + retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8") + self.assertEqual(result.status, "complete") + self.assertIn("invalid_file_writer_output_summary", retry_prompt) + self.assertIn("... ", retry_prompt) + self.assertLess(len(retry_prompt), 9000) + def test_patch_validator_rejects_unsafe_patch(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_terminal.py b/tests/test_terminal.py index 1764323..e77af2e 100644 --- a/tests/test_terminal.py +++ b/tests/test_terminal.py @@ -12,6 +12,7 @@ from nightshift.terminal import ( animation_frames, format_banner, format_console_event_line, + should_animate, ) @@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase): self.assertEqual(stream.getvalue(), "") + def test_terminal_animation_renders_immediately_when_started(self) -> None: + stream = FakeTTY() + animation = TerminalAnimation( + name="status_dots", + message="Starting", + stream=stream, + interval_seconds=60, + ) + + animation.start() + output = stream.getvalue() + animation.stop() + + self.assertIn("[. ] | Starting", output) + + def test_terminal_animation_does_not_depend_on_color_output(self) -> None: + stream = FakeTTY() + with patch.dict("os.environ", {"NO_COLOR": "1"}): + self.assertTrue(should_animate(stream)) + def test_console_event_line_colors_success_and_failure(self) -> None: success = format_console_event_line( "2026-05-17T00:00:00Z",