diff --git a/docs/ideas.md b/docs/ideas.md index 825d770..03e1825 100644 --- a/docs/ideas.md +++ b/docs/ideas.md @@ -99,6 +99,34 @@ Examples: This keeps the initial useful output visible even when strict rerun output is worse. +## P1: Store Raw Agent Invocations As JSON + +The human-readable agent artifact wraps stdout, stderr, and prompts in markdown fences. Nested markdown fences from model output can confuse downstream parsing. + +Write a machine-readable artifact alongside the markdown artifact: + +```text +-agent-output.json +``` + +Suggested fields: + +```json +{ + "agent_id": "drafter", + "stage_id": "draft_scene", + "command": "POST http://localhost:11434/api/generate", + "exit_code": 0, + "timed_out": false, + "duration_seconds": 12.3, + "stdout": "...", + "stderr": "...", + "prompt": "..." +} +``` + +Pipeline parsing should read raw JSON fields instead of recovering stdout from markdown. + ## P1: Add A Writing-Mode Validator Add deterministic checks for prose workflows: diff --git a/nightshift/agents.py b/nightshift/agents.py index 8cd0b15..0d09825 100644 --- a/nightshift/agents.py +++ b/nightshift/agents.py @@ -460,16 +460,14 @@ def output_contract_for(stage: StageConfig) -> str: ] ) if stage.type == "file_writer": + contract = _file_writer_block_contract(stage) allowed = _format_allowed_file_writer_paths(stage.allowed_paths) return "\n".join( [ "Return complete file contents only.", - "Use one fenced block per file with this exact opening form:", - "```file:path/inside/project.ext", - "", - "```", + contract, allowed, - "Do not include prose outside file blocks.", + "Do not include prose outside file blocks or delimiter blocks.", "Include only files required for this stage and task.", "NightShift will generate the unified diff deterministically.", "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.", @@ -516,6 +514,30 @@ def output_contract_for(stage: StageConfig) -> str: return "Write the requested stage output in concise markdown." +def _file_writer_block_contract(stage: StageConfig) -> str: + normalized = tuple(path.replace("\\", "/").rstrip("/") for path in stage.allowed_paths) + if normalized == ("story/chapters",): + return "\n".join( + [ + "Use exactly this delimiter format for the scene file:", + "FILE: story/chapters/chapter-001/scene-001.md", + "---CONTENT---", + "", + "---END---", + "Do not use markdown code fences for prose scene output.", + ] + ) + return "\n".join( + [ + "Use one fenced block per file with this exact opening form:", + "```file:path/inside/project.ext", + "", + "```", + "Alternatively, use FILE: path with ---CONTENT--- and ---END--- delimiters.", + ] + ) + + def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str: if not allowed_paths: return "Use real project-relative paths, not placeholder paths." diff --git a/nightshift/cli.py b/nightshift/cli.py index e25cdef..28f3540 100644 --- a/nightshift/cli.py +++ b/nightshift/cli.py @@ -216,8 +216,9 @@ def main(argv: list[str] | None = None) -> int: message="Starting all tasks", enabled=not args.no_animation, ) as animation: - runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message)) + runner = PipelineRunner(config, logger=RunLogger(console=animation.emit, status=animation.update_message)) result = runner.run_tasks(tasks) + animation.finish(f"Tasks run: {len(result.task_results)} | Completed: {result.completed_count} | Failed: {result.failed_count}", status=result.status) print(f"Status: {result.status}") print(f"Tasks run: {len(result.task_results)}") print(f"Completed: {result.completed_count}") @@ -232,8 +233,9 @@ def main(argv: list[str] | None = None) -> int: message=f"Task: {task.id} | Starting", enabled=not args.no_animation, ) as animation: - runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message)) + runner = PipelineRunner(config, logger=RunLogger(console=animation.emit, status=animation.update_message)) result = runner.run_task(task) + animation.finish(f"Task: {result.task_id} | Retries: {result.retry_count} | {result.reason}", status=result.status) print(f"Task: {result.task_id}") print(style_text(f"Status: {result.status}", color=_status_color(result.status), bold=True)) print(f"Retries: {result.retry_count}") diff --git a/nightshift/patches.py b/nightshift/patches.py index 051f5d1..c64eced 100644 --- a/nightshift/patches.py +++ b/nightshift/patches.py @@ -93,6 +93,7 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]: """Parse model-supplied complete file content blocks.""" updates: list[FileUpdate] = [] + updates.extend(_parse_delimited_file_updates(text)) pattern = re.compile( r"```(?:file|path)[:=](?P[^\n`]+)\n(?P.*?)```", flags=re.DOTALL | re.IGNORECASE, @@ -105,11 +106,24 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]: updates.append(FileUpdate(path=path, content=content)) if not updates: raise PipelineError( - "File writer error: no file blocks found. Expected fenced blocks like ```file:path.py." + "File writer error: no file blocks found. Expected FILE: path with ---CONTENT---/---END--- or fenced blocks like ```file:path.py." ) return tuple(updates) +def _parse_delimited_file_updates(text: str) -> list[FileUpdate]: + pattern = re.compile( + r"(?ms)^FILE:\s*(?P[^\n]+)\n---CONTENT---\n(?P.*?)\n---END---\s*$" + ) + updates: list[FileUpdate] = [] + for match in pattern.finditer(text): + path = match.group("path").strip().strip("`") + content = match.group("content") + if path: + updates.append(FileUpdate(path=path, content=content + "\n")) + return updates + + def generate_patch_from_file_updates( updates: tuple[FileUpdate, ...], project_root: str | Path, diff --git a/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md b/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md index 2f3df01..cb8ac35 100644 --- a/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md +++ b/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md @@ -11,10 +11,13 @@ Rules: - Do not resolve future plot threads unless the task explicitly asks for that. - Do not include author notes, TODOs, bracket placeholders, or analysis in the scene file. -Output only complete file content blocks. -Use one fenced block per file: -```file:story/chapters/chapter-001/scene-001.md +Output only one complete file block using this delimiter format: +FILE: story/chapters/chapter-001/scene-001.md +---CONTENT--- -``` +---END--- + +Do not use markdown code fences for scene prose output. +Do not output a plan, notes, analysis, or any text outside the delimiter block. If the task does not specify a scene path, choose the next obvious path under `story/chapters/` and keep it stable. diff --git a/nightshift/runlog.py b/nightshift/runlog.py index 2e4a9d8..a31313a 100644 --- a/nightshift/runlog.py +++ b/nightshift/runlog.py @@ -92,17 +92,19 @@ def format_status_event_message(event: str, message: str, fields: dict[str, obje return f"Task: {task_id} | Starting" if task_id else "Starting task" if event == "stage.start" and stage_id: label = f"{stage_id} ({stage_type})" if stage_type else stage_id - return f"{prefix}Stage: {label}{retry_text}" + return f"{prefix}>> Stage: {label}{retry_text}" if event == "agent.start": model_text = f" | Model: {model}" if model else "" return f"{prefix}Agent: {agent_id or stage_id}{model_text}" if event == "command.start": return f"{prefix}Command: {command or stage_id}" if event == "stage.retry": - return f"{prefix}Retrying after {stage_id} -> {next_stage}{retry_text}" + return f"{prefix}Retry: {stage_id} -> {next_stage}{retry_text}" if event in {"stage.finish", "task.finish"} and status: target = f"Stage: {stage_id}" if event == "stage.finish" and stage_id else "Task" - return f"{prefix}{target} {status}" + reason = str(fields.get("reason", "") or "") + reason_text = f" | {reason}" if reason and status not in {"pass", "complete"} else "" + return f"{prefix}{target}: {status}{reason_text}" if event.endswith(".start"): return f"{prefix}{message}" return None diff --git a/nightshift/terminal.py b/nightshift/terminal.py index b6bc4e8..19b8e42 100644 --- a/nightshift/terminal.py +++ b/nightshift/terminal.py @@ -6,6 +6,7 @@ import os import sys from typing import TextIO import random +import shutil import threading import time @@ -166,6 +167,7 @@ class TerminalAnimation: self._thread: threading.Thread | None = None self._width = 0 self._lock = threading.Lock() + self._last_rendered = "" def __enter__(self) -> "TerminalAnimation": self.start() @@ -193,6 +195,25 @@ class TerminalAnimation: with self._lock: self.message = message + def emit(self, line: str) -> None: + if not self.enabled: + print(line) + return + with self._lock: + message = self.message + self._clear() + print(line) + with self._lock: + self.message = message + self._render_frame(0) + + def finish(self, message: str, *, status: str = "") -> None: + if not self.enabled: + return + self.stop() + line = format_status_bar_message(message, status=status, stream=self.stream) + print(line) + def _run(self) -> None: index = 1 while not self._stop.is_set(): @@ -205,8 +226,10 @@ class TerminalAnimation: with self._lock: message = self.message text = f"{frame} | {message}" - self._width = max(self._width, len(text)) - self.stream.write("\r" + text.ljust(self._width)) + visible_width = terminal_text_width(text) + self._width = max(self._width, visible_width) + self._last_rendered = text + self.stream.write("\r" + text + (" " * max(0, self._width - visible_width))) self.stream.flush() def _clear(self) -> None: @@ -245,6 +268,31 @@ def should_style(stream: TextIO | None = None) -> bool: return bool(getattr(stream, "isatty", lambda: False)()) +def terminal_text_width(text: str) -> int: + return len(strip_ansi_escape_sequences(text)) + + +def terminal_columns(default: int = 100) -> int: + return shutil.get_terminal_size((default, 20)).columns + + +def truncate_terminal_text(text: str, max_width: int) -> str: + if max_width <= 1 or terminal_text_width(text) <= max_width: + return text + plain = strip_ansi_escape_sequences(text) + return plain[: max_width - 1].rstrip() + "…" + + +def format_status_bar_message(message: str, *, status: str = "", stream: TextIO | None = None) -> str: + label = "NightShift" + if status: + label = f"{label} {status.upper()}" + color = _status_color(status) or CYAN + prefix = style_text(f"[{label}]", color=color, bold=True, stream=stream) + available = max(20, terminal_columns() - terminal_text_width(prefix) - 4) + return f"{prefix} {truncate_terminal_text(message, available)}" + + def style_text(text: str, *, color: str | None = None, bold: bool = False, dim: bool = False, stream: TextIO | None = None) -> str: if not should_style(stream): return text @@ -305,7 +353,7 @@ def format_console_event_line( color = _event_color(event, fields) if color is None: return line - return style_text(line, color=color, stream=stream) + return style_text(_decorate_event_line(line, event, fields), color=color, bold=_event_bold(event, fields), stream=stream) def format_plain_event_line(timestamp: str, event: str, message: str, fields: dict[str, object]) -> str: @@ -338,5 +386,49 @@ def _event_color(event: str, fields: dict[str, object]) -> str | None: return None +def _status_color(status: str) -> str | None: + normalized = status.lower() + if normalized in {"complete", "pass", "success", "ok"}: + return GREEN + if normalized in {"fail", "failed", "error"}: + return RED + if normalized in {"retry", "warning", "warn", "blocked"}: + return YELLOW + return None + + +def _event_bold(event: str, fields: dict[str, object]) -> bool: + event_name = event.lower() + status = str(fields.get("status", "")).lower() + return event_name in {"stage.start", "stage.finish", "stage.retry", "task.finish"} or status in { + "failed", + "fail", + "error", + } + + +def _decorate_event_line(line: str, event: str, fields: dict[str, object]) -> str: + event_name = event.lower() + if event_name == "stage.start": + return f">> {line}" + if event_name == "stage.finish": + status = str(fields.get("status", "")).lower() + marker = "!!" if status in {"fail", "failed", "error"} else "OK" + return f"{marker} {line}" + if event_name == "stage.retry": + return f"RETRY {line}" + if event_name == "task.finish": + status = str(fields.get("status", "")).lower() + marker = "DONE" if status == "complete" else "FAIL" + return f"{marker} {line}" + return line + + +def strip_ansi_escape_sequences(text: str) -> str: + import re + + return re.sub(r"\x1b\[[0-?]*[ -/]*[@-~]", "", text) + + def _format_value(value: object) -> str: return str(value).replace("\n", " ").replace("\r", " ") diff --git a/tests/test_agents.py b/tests/test_agents.py index 9401164..fbeaded 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -93,6 +93,10 @@ class AgentExecutorTests(unittest.TestCase): self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt) self.assertIn("This is the drafting stage", prompt) + self.assertIn("FILE: story/chapters/chapter-001/scene-001.md", prompt) + self.assertIn("---CONTENT---", prompt) + self.assertIn("---END---", prompt) + self.assertIn("Do not use markdown code fences", prompt) def test_command_agent_writes_output_and_returns_pass(self) -> None: with tempfile.TemporaryDirectory() as directory: diff --git a/tests/test_patches.py b/tests/test_patches.py index 3846583..17cbb1d 100644 --- a/tests/test_patches.py +++ b/tests/test_patches.py @@ -247,6 +247,19 @@ test self.assertIn("new file mode 100644", patch) self.assertEqual(result.files, ("src/app.py", "src/test_app.py")) + def test_file_updates_parse_explicit_delimiters(self) -> None: + updates = parse_file_updates( + """FILE: story/chapters/chapter-001/scene-001.md +---CONTENT--- +Sunlight did not belong here. +---END--- +""" + ) + + self.assertEqual(len(updates), 1) + self.assertEqual(updates[0].path, "story/chapters/chapter-001/scene-001.md") + self.assertEqual(updates[0].content, "Sunlight did not belong here.\n") + def test_file_updates_reject_duplicate_blocks(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_terminal.py b/tests/test_terminal.py index e77af2e..eb206bf 100644 --- a/tests/test_terminal.py +++ b/tests/test_terminal.py @@ -12,6 +12,7 @@ from nightshift.terminal import ( animation_frames, format_banner, format_console_event_line, + format_status_bar_message, should_animate, ) @@ -46,6 +47,16 @@ class TerminalStylingTests(unittest.TestCase): self.assertEqual(stream.getvalue(), "") + def test_terminal_animation_emit_prints_for_non_tty(self) -> None: + stream = StringIO() + output = StringIO() + animation = TerminalAnimation(stream=stream) + + with patch("sys.stdout", output): + animation.emit("plain log") + + self.assertEqual(output.getvalue().strip(), "plain log") + def test_terminal_animation_renders_immediately_when_started(self) -> None: stream = FakeTTY() animation = TerminalAnimation( @@ -61,6 +72,31 @@ class TerminalStylingTests(unittest.TestCase): self.assertIn("[. ] | Starting", output) + def test_terminal_animation_emit_clears_and_redraws_status(self) -> None: + stream = FakeTTY() + animation = TerminalAnimation( + name="status_dots", + message="Stage: plan", + stream=stream, + interval_seconds=60, + ) + + output = StringIO() + with patch("sys.stdout", output): + animation.start() + animation.emit("log line") + stream_output = stream.getvalue() + animation.stop() + + self.assertIn("log line", output.getvalue()) + self.assertGreaterEqual(stream_output.count("Stage: plan"), 2) + + def test_format_status_bar_message_uses_status_color(self) -> None: + line = format_status_bar_message("Task done", status="complete", stream=FakeTTY()) + + self.assertIn("\x1b[32m", line) + self.assertIn("[NightShift COMPLETE]", line) + def test_terminal_animation_does_not_depend_on_color_output(self) -> None: stream = FakeTTY() with patch.dict("os.environ", {"NO_COLOR": "1"}): @@ -83,6 +119,8 @@ class TerminalStylingTests(unittest.TestCase): ) self.assertIn("\x1b[32m", success) self.assertIn("\x1b[31m", failure) + self.assertIn("DONE ", success) + self.assertIn("FAIL ", failure) self.assertTrue(success.endswith("\x1b[0m")) self.assertTrue(failure.endswith("\x1b[0m")) @@ -130,7 +168,7 @@ class TerminalStylingTests(unittest.TestCase): model="qwen3-coder:30b", ) - self.assertEqual(statuses[0], "Task: TASK-001 | Stage: implement (file_writer) retry 2") + self.assertEqual(statuses[0], "Task: TASK-001 | >> Stage: implement (file_writer) retry 2") self.assertEqual(statuses[1], "Task: TASK-001 | Agent: implementer | Model: qwen3-coder:30b") def test_format_status_event_message_reports_retries(self) -> None: @@ -145,7 +183,7 @@ class TerminalStylingTests(unittest.TestCase): }, ) - self.assertEqual(message, "Task: TASK-001 | Retrying after test -> implement retry 1") + self.assertEqual(message, "Task: TASK-001 | Retry: test -> implement retry 1") if __name__ == "__main__":