mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 18:18:36 +00:00
Accuracy tweaks for story writer
This commit is contained in:
parent
9157853c10
commit
dee87434e7
|
|
@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str:
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
if stage.type == "file_writer":
|
if stage.type == "file_writer":
|
||||||
|
allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
|
||||||
return "\n".join(
|
return "\n".join(
|
||||||
[
|
[
|
||||||
"Return complete file contents only.",
|
"Return complete file contents only.",
|
||||||
"Use one fenced block per file with this exact opening form:",
|
"Use one fenced block per file with this exact opening form:",
|
||||||
"```file:relative/path.py",
|
"```file:path/inside/project.ext",
|
||||||
"<complete file content>",
|
"<complete file content>",
|
||||||
"```",
|
"```",
|
||||||
|
allowed,
|
||||||
"Do not include prose outside file blocks.",
|
"Do not include prose outside file blocks.",
|
||||||
"Include every file needed for the task, including tests.",
|
"Include only files required for this stage and task.",
|
||||||
"NightShift will generate the unified diff deterministically.",
|
"NightShift will generate the unified diff deterministically.",
|
||||||
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
|
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
|
||||||
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
|
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
|
||||||
|
|
@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str:
|
||||||
return "Write the requested stage output in concise markdown."
|
return "Write the requested stage output in concise markdown."
|
||||||
|
|
||||||
|
|
||||||
|
def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
|
||||||
|
if not allowed_paths:
|
||||||
|
return "Use real project-relative paths, not placeholder paths."
|
||||||
|
paths = ", ".join(f"`{path}`" for path in allowed_paths)
|
||||||
|
return f"Use only paths under these project-relative targets: {paths}."
|
||||||
|
|
||||||
|
|
||||||
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
|
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
|
||||||
values: dict[str, str] = {}
|
values: dict[str, str] = {}
|
||||||
for line in output.splitlines():
|
for line in output.splitlines():
|
||||||
|
|
|
||||||
|
|
@ -771,10 +771,14 @@ class PipelineRunner:
|
||||||
task_id=task.id,
|
task_id=task.id,
|
||||||
)
|
)
|
||||||
rerun_outputs = dict(enriched_outputs)
|
rerun_outputs = dict(enriched_outputs)
|
||||||
rerun_outputs["invalid_file_writer_output"] = stdout
|
rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
|
||||||
|
stdout,
|
||||||
|
str(exc),
|
||||||
|
)
|
||||||
strict_notes = [
|
strict_notes = [
|
||||||
*retry_notes,
|
*retry_notes,
|
||||||
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
|
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
|
||||||
|
"Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.",
|
||||||
]
|
]
|
||||||
result = self.agent_executor.run_stage(
|
result = self.agent_executor.run_stage(
|
||||||
agent_stage,
|
agent_stage,
|
||||||
|
|
@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str:
|
||||||
return "\n".join(kept)
|
return "\n".join(kept)
|
||||||
|
|
||||||
|
|
||||||
|
def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str:
|
||||||
|
lines = [
|
||||||
|
f"Reason: {reason}",
|
||||||
|
f"Output length: {len(output)} characters",
|
||||||
|
]
|
||||||
|
lowered = output.lower()
|
||||||
|
if "```file:" in lowered or "```path:" in lowered:
|
||||||
|
lines.append("The output started a file block, but NightShift could not parse a complete closed block.")
|
||||||
|
else:
|
||||||
|
lines.append("The output did not contain a parseable file block.")
|
||||||
|
excerpt = output.strip()
|
||||||
|
if excerpt:
|
||||||
|
if len(excerpt) > max_chars:
|
||||||
|
excerpt = excerpt[:max_chars].rstrip() + "\n... <truncated>"
|
||||||
|
lines.extend(["", "Excerpt:", "```text", excerpt, "```"])
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
|
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
|
||||||
recent = entries[-2:]
|
recent = entries[-2:]
|
||||||
if len(recent) < 2:
|
if len(recent) < 2:
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ experiment:
|
||||||
agents:
|
agents:
|
||||||
planner:
|
planner:
|
||||||
backend: ollama
|
backend: ollama
|
||||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
model: nightshift-base
|
||||||
temperature: 0.4
|
temperature: 0.4
|
||||||
num_ctx: 8192
|
num_ctx: 8192
|
||||||
num_predict: 4096
|
num_predict: 4096
|
||||||
|
|
@ -31,15 +31,15 @@ agents:
|
||||||
|
|
||||||
drafter:
|
drafter:
|
||||||
backend: ollama
|
backend: ollama
|
||||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
model: nightshift-writer
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
num_ctx: 8192
|
num_ctx: 16384
|
||||||
num_predict: 4096
|
num_predict: 8192
|
||||||
system_prompt: .nightshift/agents/drafter.md
|
system_prompt: .nightshift/agents/drafter.md
|
||||||
|
|
||||||
continuity_reviewer:
|
continuity_reviewer:
|
||||||
backend: ollama
|
backend: ollama
|
||||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
model: nightshift-base
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
num_ctx: 8192
|
num_ctx: 8192
|
||||||
num_predict: 4096
|
num_predict: 4096
|
||||||
|
|
@ -47,7 +47,7 @@ agents:
|
||||||
|
|
||||||
style_reviewer:
|
style_reviewer:
|
||||||
backend: ollama
|
backend: ollama
|
||||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
model: nightshift-base
|
||||||
temperature: 0.3
|
temperature: 0.3
|
||||||
num_ctx: 8192
|
num_ctx: 8192
|
||||||
num_predict: 4096
|
num_predict: 4096
|
||||||
|
|
@ -55,9 +55,9 @@ agents:
|
||||||
|
|
||||||
state_updater:
|
state_updater:
|
||||||
backend: ollama
|
backend: ollama
|
||||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
model: nightshift-writer
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
num_ctx: 8192
|
num_ctx: 16384
|
||||||
num_predict: 4096
|
num_predict: 4096
|
||||||
system_prompt: .nightshift/agents/state-updater.md
|
system_prompt: .nightshift/agents/state-updater.md
|
||||||
|
|
||||||
|
|
@ -83,6 +83,8 @@ pipeline:
|
||||||
type: file_writer
|
type: file_writer
|
||||||
agent: drafter
|
agent: drafter
|
||||||
output: scene-draft.patch
|
output: scene-draft.patch
|
||||||
|
allowed_paths:
|
||||||
|
- story/chapters
|
||||||
|
|
||||||
- id: normalize_draft
|
- id: normalize_draft
|
||||||
type: patch_normalizer
|
type: patch_normalizer
|
||||||
|
|
@ -120,6 +122,11 @@ pipeline:
|
||||||
type: file_writer
|
type: file_writer
|
||||||
agent: state_updater
|
agent: state_updater
|
||||||
output: state-update.patch
|
output: state-update.patch
|
||||||
|
allowed_paths:
|
||||||
|
- story/plot-state.md
|
||||||
|
- story/characters.md
|
||||||
|
- story/timeline.md
|
||||||
|
- story/unresolved-threads.md
|
||||||
|
|
||||||
- id: normalize_state
|
- id: normalize_state
|
||||||
type: patch_normalizer
|
type: patch_normalizer
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ class TerminalAnimation:
|
||||||
self.message = message
|
self.message = message
|
||||||
self.stream = stream or sys.stderr
|
self.stream = stream or sys.stderr
|
||||||
self.interval_seconds = interval_seconds
|
self.interval_seconds = interval_seconds
|
||||||
self.enabled = enabled and should_style(self.stream)
|
self.enabled = enabled and should_animate(self.stream)
|
||||||
self._stop = threading.Event()
|
self._stop = threading.Event()
|
||||||
self._thread: threading.Thread | None = None
|
self._thread: threading.Thread | None = None
|
||||||
self._width = 0
|
self._width = 0
|
||||||
|
|
@ -177,6 +177,7 @@ class TerminalAnimation:
|
||||||
def start(self) -> None:
|
def start(self) -> None:
|
||||||
if not self.enabled or self._thread is not None:
|
if not self.enabled or self._thread is not None:
|
||||||
return
|
return
|
||||||
|
self._render_frame(0)
|
||||||
self._thread = threading.Thread(target=self._run, daemon=True)
|
self._thread = threading.Thread(target=self._run, daemon=True)
|
||||||
self._thread.start()
|
self._thread.start()
|
||||||
|
|
||||||
|
|
@ -193,8 +194,13 @@ class TerminalAnimation:
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
def _run(self) -> None:
|
def _run(self) -> None:
|
||||||
index = 0
|
index = 1
|
||||||
while not self._stop.is_set():
|
while not self._stop.is_set():
|
||||||
|
self._render_frame(index)
|
||||||
|
index += 1
|
||||||
|
self._stop.wait(self.interval_seconds)
|
||||||
|
|
||||||
|
def _render_frame(self, index: int) -> None:
|
||||||
frame = self.frames[index % len(self.frames)]
|
frame = self.frames[index % len(self.frames)]
|
||||||
with self._lock:
|
with self._lock:
|
||||||
message = self.message
|
message = self.message
|
||||||
|
|
@ -202,8 +208,6 @@ class TerminalAnimation:
|
||||||
self._width = max(self._width, len(text))
|
self._width = max(self._width, len(text))
|
||||||
self.stream.write("\r" + text.ljust(self._width))
|
self.stream.write("\r" + text.ljust(self._width))
|
||||||
self.stream.flush()
|
self.stream.flush()
|
||||||
index += 1
|
|
||||||
self._stop.wait(self.interval_seconds)
|
|
||||||
|
|
||||||
def _clear(self) -> None:
|
def _clear(self) -> None:
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
|
|
@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]:
|
||||||
frames = HOTDOG_ANIMATIONS["agent_thinking"]
|
frames = HOTDOG_ANIMATIONS["agent_thinking"]
|
||||||
return tuple(frames)
|
return tuple(frames)
|
||||||
|
|
||||||
|
|
||||||
|
def should_animate(stream: TextIO | None = None) -> bool:
|
||||||
|
stream = stream or sys.stderr
|
||||||
|
if os.environ.get("TERM") == "dumb":
|
||||||
|
return False
|
||||||
|
if bool(getattr(stream, "isatty", lambda: False)()):
|
||||||
|
return True
|
||||||
|
if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()):
|
||||||
|
return True
|
||||||
|
if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def should_style(stream: TextIO | None = None) -> bool:
|
def should_style(stream: TextIO | None = None) -> bool:
|
||||||
stream = stream or sys.stdout
|
stream = stream or sys.stdout
|
||||||
if os.environ.get("NO_COLOR"):
|
if os.environ.get("NO_COLOR"):
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertIn("On repair attempts", prompt)
|
self.assertIn("On repair attempts", prompt)
|
||||||
self.assertIn("failed stage output", prompt)
|
self.assertIn("failed stage output", prompt)
|
||||||
|
self.assertIn("Use real project-relative paths", prompt)
|
||||||
|
self.assertNotIn("relative/path.py", prompt)
|
||||||
|
self.assertNotIn("including tests", prompt)
|
||||||
|
|
||||||
|
def test_file_writer_contract_includes_stage_allowed_paths(self) -> None:
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
prompt = build_prompt_bundle(
|
||||||
|
system_prompt="System rules",
|
||||||
|
stage=StageConfig(
|
||||||
|
id="write",
|
||||||
|
type="file_writer",
|
||||||
|
agent="writer",
|
||||||
|
allowed_paths=("story/chapters",),
|
||||||
|
),
|
||||||
|
project_context="Project context",
|
||||||
|
task=task,
|
||||||
|
previous_outputs={},
|
||||||
|
retry_notes=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
|
||||||
|
|
||||||
def test_command_agent_writes_output_and_returns_pass(self) -> None:
|
def test_command_agent_writes_output_and_returns_pass(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase):
|
||||||
self.assertTrue((root / "story" / "plot-state.md").exists())
|
self.assertTrue((root / "story" / "plot-state.md").exists())
|
||||||
self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
|
self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
|
||||||
self.assertIn("type: file_writer", config)
|
self.assertIn("type: file_writer", config)
|
||||||
|
self.assertIn("model: nightshift-writer", config)
|
||||||
|
self.assertIn("model: nightshift-base", config)
|
||||||
self.assertIn("story/chapters", config)
|
self.assertIn("story/chapters", config)
|
||||||
self.assertIn("story/worldbuilding.md", gitignore)
|
self.assertIn("story/worldbuilding.md", gitignore)
|
||||||
self.assertIn("story/chapters/**/*.md", gitignore)
|
self.assertIn("story/chapters/**/*.md", gitignore)
|
||||||
|
|
|
||||||
|
|
@ -715,6 +715,49 @@ Acceptance Criteria:
|
||||||
self.assertEqual(result.status, "complete")
|
self.assertEqual(result.status, "complete")
|
||||||
self.assertIn("+new", patch.read_text(encoding="utf-8"))
|
self.assertIn("+new", patch.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
(root / "app.py").write_text("old\n", encoding="utf-8")
|
||||||
|
(root / "fake_writer.py").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"import sys",
|
||||||
|
"prompt = sys.stdin.read()",
|
||||||
|
"if 'Previous file_writer output was invalid' not in prompt:",
|
||||||
|
" print('```file:app.py')",
|
||||||
|
" print('x' * 5000)",
|
||||||
|
"else:",
|
||||||
|
" (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))",
|
||||||
|
" print('```file:app.py')",
|
||||||
|
" print('new')",
|
||||||
|
" print('```')",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="write", type="file_writer", agent="writer"),
|
||||||
|
StageConfig(id="validate", type="patch_validator"),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages)
|
||||||
|
config.agents["writer"] = AgentConfig(
|
||||||
|
id="writer",
|
||||||
|
backend="command",
|
||||||
|
command="python fake_writer.py",
|
||||||
|
system_prompt=Path("planner.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
|
||||||
|
result = runner.run_task(parse_tasks(TASK_MD)[0])
|
||||||
|
|
||||||
|
retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8")
|
||||||
|
self.assertEqual(result.status, "complete")
|
||||||
|
self.assertIn("invalid_file_writer_output_summary", retry_prompt)
|
||||||
|
self.assertIn("... <truncated>", retry_prompt)
|
||||||
|
self.assertLess(len(retry_prompt), 9000)
|
||||||
|
|
||||||
def test_patch_validator_rejects_unsafe_patch(self) -> None:
|
def test_patch_validator_rejects_unsafe_patch(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
root = Path(directory)
|
root = Path(directory)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from nightshift.terminal import (
|
||||||
animation_frames,
|
animation_frames,
|
||||||
format_banner,
|
format_banner,
|
||||||
format_console_event_line,
|
format_console_event_line,
|
||||||
|
should_animate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(stream.getvalue(), "")
|
self.assertEqual(stream.getvalue(), "")
|
||||||
|
|
||||||
|
def test_terminal_animation_renders_immediately_when_started(self) -> None:
|
||||||
|
stream = FakeTTY()
|
||||||
|
animation = TerminalAnimation(
|
||||||
|
name="status_dots",
|
||||||
|
message="Starting",
|
||||||
|
stream=stream,
|
||||||
|
interval_seconds=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
animation.start()
|
||||||
|
output = stream.getvalue()
|
||||||
|
animation.stop()
|
||||||
|
|
||||||
|
self.assertIn("[. ] | Starting", output)
|
||||||
|
|
||||||
|
def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
|
||||||
|
stream = FakeTTY()
|
||||||
|
with patch.dict("os.environ", {"NO_COLOR": "1"}):
|
||||||
|
self.assertTrue(should_animate(stream))
|
||||||
|
|
||||||
def test_console_event_line_colors_success_and_failure(self) -> None:
|
def test_console_event_line_colors_success_and_failure(self) -> None:
|
||||||
success = format_console_event_line(
|
success = format_console_event_line(
|
||||||
"2026-05-17T00:00:00Z",
|
"2026-05-17T00:00:00Z",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user