mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 10:08:37 +00:00
Accuracy tweaks for story writer
This commit is contained in:
parent
9157853c10
commit
dee87434e7
|
|
@ -460,15 +460,17 @@ def output_contract_for(stage: StageConfig) -> str:
|
|||
]
|
||||
)
|
||||
if stage.type == "file_writer":
|
||||
allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
|
||||
return "\n".join(
|
||||
[
|
||||
"Return complete file contents only.",
|
||||
"Use one fenced block per file with this exact opening form:",
|
||||
"```file:relative/path.py",
|
||||
"```file:path/inside/project.ext",
|
||||
"<complete file content>",
|
||||
"```",
|
||||
allowed,
|
||||
"Do not include prose outside file blocks.",
|
||||
"Include every file needed for the task, including tests.",
|
||||
"Include only files required for this stage and task.",
|
||||
"NightShift will generate the unified diff deterministically.",
|
||||
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
|
||||
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
|
||||
|
|
@ -514,6 +516,13 @@ def output_contract_for(stage: StageConfig) -> str:
|
|||
return "Write the requested stage output in concise markdown."
|
||||
|
||||
|
||||
def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
|
||||
if not allowed_paths:
|
||||
return "Use real project-relative paths, not placeholder paths."
|
||||
paths = ", ".join(f"`{path}`" for path in allowed_paths)
|
||||
return f"Use only paths under these project-relative targets: {paths}."
|
||||
|
||||
|
||||
def parse_review_output(output: str) -> tuple[StageStatus, str, str | None, str | None]:
|
||||
values: dict[str, str] = {}
|
||||
for line in output.splitlines():
|
||||
|
|
|
|||
|
|
@ -771,10 +771,14 @@ class PipelineRunner:
|
|||
task_id=task.id,
|
||||
)
|
||||
rerun_outputs = dict(enriched_outputs)
|
||||
rerun_outputs["invalid_file_writer_output"] = stdout
|
||||
rerun_outputs["invalid_file_writer_output_summary"] = _invalid_file_writer_output_summary(
|
||||
stdout,
|
||||
str(exc),
|
||||
)
|
||||
strict_notes = [
|
||||
*retry_notes,
|
||||
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
|
||||
"Use complete fenced file blocks with both the opening ```file:path and closing ``` fence.",
|
||||
]
|
||||
result = self.agent_executor.run_stage(
|
||||
agent_stage,
|
||||
|
|
@ -1529,6 +1533,24 @@ def _filter_future_task_test_lines(text: str, task_id: str) -> str:
|
|||
return "\n".join(kept)
|
||||
|
||||
|
||||
def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int = 1200) -> str:
|
||||
lines = [
|
||||
f"Reason: {reason}",
|
||||
f"Output length: {len(output)} characters",
|
||||
]
|
||||
lowered = output.lower()
|
||||
if "```file:" in lowered or "```path:" in lowered:
|
||||
lines.append("The output started a file block, but NightShift could not parse a complete closed block.")
|
||||
else:
|
||||
lines.append("The output did not contain a parseable file block.")
|
||||
excerpt = output.strip()
|
||||
if excerpt:
|
||||
if len(excerpt) > max_chars:
|
||||
excerpt = excerpt[:max_chars].rstrip() + "\n... <truncated>"
|
||||
lines.extend(["", "Excerpt:", "```text", excerpt, "```"])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _repeated_protected_path_violation(entries: tuple[RetryMemoryEntry, ...]) -> bool:
|
||||
recent = entries[-2:]
|
||||
if len(recent) < 2:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ experiment:
|
|||
agents:
|
||||
planner:
|
||||
backend: ollama
|
||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
||||
model: nightshift-base
|
||||
temperature: 0.4
|
||||
num_ctx: 8192
|
||||
num_predict: 4096
|
||||
|
|
@ -31,15 +31,15 @@ agents:
|
|||
|
||||
drafter:
|
||||
backend: ollama
|
||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
||||
model: nightshift-writer
|
||||
temperature: 0.7
|
||||
num_ctx: 8192
|
||||
num_predict: 4096
|
||||
num_ctx: 16384
|
||||
num_predict: 8192
|
||||
system_prompt: .nightshift/agents/drafter.md
|
||||
|
||||
continuity_reviewer:
|
||||
backend: ollama
|
||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
||||
model: nightshift-base
|
||||
temperature: 0.2
|
||||
num_ctx: 8192
|
||||
num_predict: 4096
|
||||
|
|
@ -47,7 +47,7 @@ agents:
|
|||
|
||||
style_reviewer:
|
||||
backend: ollama
|
||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
||||
model: nightshift-base
|
||||
temperature: 0.3
|
||||
num_ctx: 8192
|
||||
num_predict: 4096
|
||||
|
|
@ -55,9 +55,9 @@ agents:
|
|||
|
||||
state_updater:
|
||||
backend: ollama
|
||||
model: hf.co/TheDrummer/Snowpiercer-15B-v4-GGUF:Q5_K_M
|
||||
model: nightshift-writer
|
||||
temperature: 0.2
|
||||
num_ctx: 8192
|
||||
num_ctx: 16384
|
||||
num_predict: 4096
|
||||
system_prompt: .nightshift/agents/state-updater.md
|
||||
|
||||
|
|
@ -83,6 +83,8 @@ pipeline:
|
|||
type: file_writer
|
||||
agent: drafter
|
||||
output: scene-draft.patch
|
||||
allowed_paths:
|
||||
- story/chapters
|
||||
|
||||
- id: normalize_draft
|
||||
type: patch_normalizer
|
||||
|
|
@ -120,6 +122,11 @@ pipeline:
|
|||
type: file_writer
|
||||
agent: state_updater
|
||||
output: state-update.patch
|
||||
allowed_paths:
|
||||
- story/plot-state.md
|
||||
- story/characters.md
|
||||
- story/timeline.md
|
||||
- story/unresolved-threads.md
|
||||
|
||||
- id: normalize_state
|
||||
type: patch_normalizer
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ class TerminalAnimation:
|
|||
self.message = message
|
||||
self.stream = stream or sys.stderr
|
||||
self.interval_seconds = interval_seconds
|
||||
self.enabled = enabled and should_style(self.stream)
|
||||
self.enabled = enabled and should_animate(self.stream)
|
||||
self._stop = threading.Event()
|
||||
self._thread: threading.Thread | None = None
|
||||
self._width = 0
|
||||
|
|
@ -177,6 +177,7 @@ class TerminalAnimation:
|
|||
def start(self) -> None:
|
||||
if not self.enabled or self._thread is not None:
|
||||
return
|
||||
self._render_frame(0)
|
||||
self._thread = threading.Thread(target=self._run, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
|
|
@ -193,18 +194,21 @@ class TerminalAnimation:
|
|||
self.message = message
|
||||
|
||||
def _run(self) -> None:
|
||||
index = 0
|
||||
index = 1
|
||||
while not self._stop.is_set():
|
||||
frame = self.frames[index % len(self.frames)]
|
||||
with self._lock:
|
||||
message = self.message
|
||||
text = f"{frame} | {message}"
|
||||
self._width = max(self._width, len(text))
|
||||
self.stream.write("\r" + text.ljust(self._width))
|
||||
self.stream.flush()
|
||||
self._render_frame(index)
|
||||
index += 1
|
||||
self._stop.wait(self.interval_seconds)
|
||||
|
||||
def _render_frame(self, index: int) -> None:
|
||||
frame = self.frames[index % len(self.frames)]
|
||||
with self._lock:
|
||||
message = self.message
|
||||
text = f"{frame} | {message}"
|
||||
self._width = max(self._width, len(text))
|
||||
self.stream.write("\r" + text.ljust(self._width))
|
||||
self.stream.flush()
|
||||
|
||||
def _clear(self) -> None:
|
||||
if not self.enabled:
|
||||
return
|
||||
|
|
@ -218,6 +222,20 @@ def animation_frames(name: str) -> tuple[str, ...]:
|
|||
frames = HOTDOG_ANIMATIONS["agent_thinking"]
|
||||
return tuple(frames)
|
||||
|
||||
|
||||
def should_animate(stream: TextIO | None = None) -> bool:
|
||||
stream = stream or sys.stderr
|
||||
if os.environ.get("TERM") == "dumb":
|
||||
return False
|
||||
if bool(getattr(stream, "isatty", lambda: False)()):
|
||||
return True
|
||||
if stream is sys.stderr and bool(getattr(sys.stdout, "isatty", lambda: False)()):
|
||||
return True
|
||||
if stream is sys.stdout and bool(getattr(sys.stderr, "isatty", lambda: False)()):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def should_style(stream: TextIO | None = None) -> bool:
|
||||
stream = stream or sys.stdout
|
||||
if os.environ.get("NO_COLOR"):
|
||||
|
|
|
|||
|
|
@ -71,6 +71,27 @@ class AgentExecutorTests(unittest.TestCase):
|
|||
|
||||
self.assertIn("On repair attempts", prompt)
|
||||
self.assertIn("failed stage output", prompt)
|
||||
self.assertIn("Use real project-relative paths", prompt)
|
||||
self.assertNotIn("relative/path.py", prompt)
|
||||
self.assertNotIn("including tests", prompt)
|
||||
|
||||
def test_file_writer_contract_includes_stage_allowed_paths(self) -> None:
|
||||
task = parse_tasks(TASK_MD)[0]
|
||||
prompt = build_prompt_bundle(
|
||||
system_prompt="System rules",
|
||||
stage=StageConfig(
|
||||
id="write",
|
||||
type="file_writer",
|
||||
agent="writer",
|
||||
allowed_paths=("story/chapters",),
|
||||
),
|
||||
project_context="Project context",
|
||||
task=task,
|
||||
previous_outputs={},
|
||||
retry_notes=[],
|
||||
)
|
||||
|
||||
self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
|
||||
|
||||
def test_command_agent_writes_output_and_returns_pass(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
|
|
|
|||
|
|
@ -119,6 +119,8 @@ class InitProjectTests(unittest.TestCase):
|
|||
self.assertTrue((root / "story" / "plot-state.md").exists())
|
||||
self.assertTrue((root / "story" / "chapters" / ".gitkeep").exists())
|
||||
self.assertIn("type: file_writer", config)
|
||||
self.assertIn("model: nightshift-writer", config)
|
||||
self.assertIn("model: nightshift-base", config)
|
||||
self.assertIn("story/chapters", config)
|
||||
self.assertIn("story/worldbuilding.md", gitignore)
|
||||
self.assertIn("story/chapters/**/*.md", gitignore)
|
||||
|
|
|
|||
|
|
@ -715,6 +715,49 @@ Acceptance Criteria:
|
|||
self.assertEqual(result.status, "complete")
|
||||
self.assertIn("+new", patch.read_text(encoding="utf-8"))
|
||||
|
||||
def test_file_writer_invalid_output_retry_uses_compact_summary(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
_write_common_files(root)
|
||||
(root / "app.py").write_text("old\n", encoding="utf-8")
|
||||
(root / "fake_writer.py").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"import sys",
|
||||
"prompt = sys.stdin.read()",
|
||||
"if 'Previous file_writer output was invalid' not in prompt:",
|
||||
" print('```file:app.py')",
|
||||
" print('x' * 5000)",
|
||||
"else:",
|
||||
" (open('retry-prompt.txt', 'w', encoding='utf-8').write(prompt))",
|
||||
" print('```file:app.py')",
|
||||
" print('new')",
|
||||
" print('```')",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
stages = (
|
||||
StageConfig(id="write", type="file_writer", agent="writer"),
|
||||
StageConfig(id="validate", type="patch_validator"),
|
||||
)
|
||||
config = make_config(root, stages)
|
||||
config.agents["writer"] = AgentConfig(
|
||||
id="writer",
|
||||
backend="command",
|
||||
command="python fake_writer.py",
|
||||
system_prompt=Path("planner.md"),
|
||||
)
|
||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||
|
||||
result = runner.run_task(parse_tasks(TASK_MD)[0])
|
||||
|
||||
retry_prompt = (root / "retry-prompt.txt").read_text(encoding="utf-8")
|
||||
self.assertEqual(result.status, "complete")
|
||||
self.assertIn("invalid_file_writer_output_summary", retry_prompt)
|
||||
self.assertIn("... <truncated>", retry_prompt)
|
||||
self.assertLess(len(retry_prompt), 9000)
|
||||
|
||||
def test_patch_validator_rejects_unsafe_patch(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from nightshift.terminal import (
|
|||
animation_frames,
|
||||
format_banner,
|
||||
format_console_event_line,
|
||||
should_animate,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -45,6 +46,26 @@ class TerminalStylingTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(stream.getvalue(), "")
|
||||
|
||||
def test_terminal_animation_renders_immediately_when_started(self) -> None:
|
||||
stream = FakeTTY()
|
||||
animation = TerminalAnimation(
|
||||
name="status_dots",
|
||||
message="Starting",
|
||||
stream=stream,
|
||||
interval_seconds=60,
|
||||
)
|
||||
|
||||
animation.start()
|
||||
output = stream.getvalue()
|
||||
animation.stop()
|
||||
|
||||
self.assertIn("[. ] | Starting", output)
|
||||
|
||||
def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
|
||||
stream = FakeTTY()
|
||||
with patch.dict("os.environ", {"NO_COLOR": "1"}):
|
||||
self.assertTrue(should_animate(stream))
|
||||
|
||||
def test_console_event_line_colors_success_and_failure(self) -> None:
|
||||
success = format_console_event_line(
|
||||
"2026-05-17T00:00:00Z",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user