Improve logging

2026-06-14 10:08:37 +00:00 · 2026-05-22 03:31:58 -07:00 · 2026-05-22 03:31:58 -07:00 · d928a52fb3
commit d928a52fb3
parent 6e03430a33
10 changed files with 238 additions and 20 deletions
--- a/docs/ideas.md
+++ b/docs/ideas.md
@ -99,6 +99,34 @@ Examples:

 This keeps the initial useful output visible even when strict rerun output is worse.

+## P1: Store Raw Agent Invocations As JSON
+
+The human-readable agent artifact wraps stdout, stderr, and prompts in markdown fences. Nested markdown fences from model output can confuse downstream parsing.
+
+Write a machine-readable artifact alongside the markdown artifact:
+
+```text
+<stage>-agent-output.json
+```
+
+Suggested fields:
+
+```json
+{
+  "agent_id": "drafter",
+  "stage_id": "draft_scene",
+  "command": "POST http://localhost:11434/api/generate",
+  "exit_code": 0,
+  "timed_out": false,
+  "duration_seconds": 12.3,
+  "stdout": "...",
+  "stderr": "...",
+  "prompt": "..."
+}
+```
+
+Pipeline parsing should read raw JSON fields instead of recovering stdout from markdown.
+
 ## P1: Add A Writing-Mode Validator

 Add deterministic checks for prose workflows:
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@ -460,16 +460,14 @@ def output_contract_for(stage: StageConfig) -> str:
            ]
        )
    if stage.type == "file_writer":
+        contract = _file_writer_block_contract(stage)
        allowed = _format_allowed_file_writer_paths(stage.allowed_paths)
        return "\n".join(
            [
                "Return complete file contents only.",
-                "Use one fenced block per file with this exact opening form:",
-                "```file:path/inside/project.ext",
-                "<complete file content>",
-                "```",
+                contract,
                allowed,
-                "Do not include prose outside file blocks.",
+                "Do not include prose outside file blocks or delimiter blocks.",
                "Include only files required for this stage and task.",
                "NightShift will generate the unified diff deterministically.",
                "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
@ -516,6 +514,30 @@ def output_contract_for(stage: StageConfig) -> str:
    return "Write the requested stage output in concise markdown."


+def _file_writer_block_contract(stage: StageConfig) -> str:
+    normalized = tuple(path.replace("\\", "/").rstrip("/") for path in stage.allowed_paths)
+    if normalized == ("story/chapters",):
+        return "\n".join(
+            [
+                "Use exactly this delimiter format for the scene file:",
+                "FILE: story/chapters/chapter-001/scene-001.md",
+                "---CONTENT---",
+                "<complete scene prose>",
+                "---END---",
+                "Do not use markdown code fences for prose scene output.",
+            ]
+        )
+    return "\n".join(
+        [
+            "Use one fenced block per file with this exact opening form:",
+            "```file:path/inside/project.ext",
+            "<complete file content>",
+            "```",
+            "Alternatively, use FILE: path with ---CONTENT--- and ---END--- delimiters.",
+        ]
+    )
+
+
 def _format_allowed_file_writer_paths(allowed_paths: tuple[str, ...]) -> str:
    if not allowed_paths:
        return "Use real project-relative paths, not placeholder paths."
--- a/nightshift/cli.py
+++ b/nightshift/cli.py
@ -216,8 +216,9 @@ def main(argv: list[str] | None = None) -> int:
                    message="Starting all tasks",
                    enabled=not args.no_animation,
                ) as animation:
-                    runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message))
+                    runner = PipelineRunner(config, logger=RunLogger(console=animation.emit, status=animation.update_message))
                    result = runner.run_tasks(tasks)
+                animation.finish(f"Tasks run: {len(result.task_results)} | Completed: {result.completed_count} | Failed: {result.failed_count}", status=result.status)
                print(f"Status: {result.status}")
                print(f"Tasks run: {len(result.task_results)}")
                print(f"Completed: {result.completed_count}")
@ -232,8 +233,9 @@ def main(argv: list[str] | None = None) -> int:
                message=f"Task: {task.id} | Starting",
                enabled=not args.no_animation,
            ) as animation:
-                runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message))
+                runner = PipelineRunner(config, logger=RunLogger(console=animation.emit, status=animation.update_message))
                result = runner.run_task(task)
+            animation.finish(f"Task: {result.task_id} | Retries: {result.retry_count} | {result.reason}", status=result.status)
            print(f"Task: {result.task_id}")
            print(style_text(f"Status: {result.status}", color=_status_color(result.status), bold=True))
            print(f"Retries: {result.retry_count}")
--- a/nightshift/patches.py
+++ b/nightshift/patches.py
@ -93,6 +93,7 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]:
    """Parse model-supplied complete file content blocks."""

    updates: list[FileUpdate] = []
+    updates.extend(_parse_delimited_file_updates(text))
    pattern = re.compile(
        r"```(?:file|path)[:=](?P<path>[^\n`]+)\n(?P<content>.*?)```",
        flags=re.DOTALL | re.IGNORECASE,
@ -105,11 +106,24 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]:
        updates.append(FileUpdate(path=path, content=content))
    if not updates:
        raise PipelineError(
-            "File writer error: no file blocks found. Expected fenced blocks like ```file:path.py."
+            "File writer error: no file blocks found. Expected FILE: path with ---CONTENT---/---END--- or fenced blocks like ```file:path.py."
        )
    return tuple(updates)


+def _parse_delimited_file_updates(text: str) -> list[FileUpdate]:
+    pattern = re.compile(
+        r"(?ms)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n(?P<content>.*?)\n---END---\s*$"
+    )
+    updates: list[FileUpdate] = []
+    for match in pattern.finditer(text):
+        path = match.group("path").strip().strip("`")
+        content = match.group("content")
+        if path:
+            updates.append(FileUpdate(path=path, content=content + "\n"))
+    return updates
+
+
 def generate_patch_from_file_updates(
    updates: tuple[FileUpdate, ...],
    project_root: str | Path,
--- a/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md
+++ b/nightshift/project_templates/tutorial-novel/.nightshift/agents/drafter.md
@ -11,10 +11,13 @@ Rules:
 - Do not resolve future plot threads unless the task explicitly asks for that.
 - Do not include author notes, TODOs, bracket placeholders, or analysis in the scene file.

-Output only complete file content blocks.
-Use one fenced block per file:
-```file:story/chapters/chapter-001/scene-001.md
+Output only one complete file block using this delimiter format:
+FILE: story/chapters/chapter-001/scene-001.md
+---CONTENT---
 <complete scene prose>
-```
+---END---
+
+Do not use markdown code fences for scene prose output.
+Do not output a plan, notes, analysis, or any text outside the delimiter block.

 If the task does not specify a scene path, choose the next obvious path under `story/chapters/` and keep it stable.
--- a/nightshift/runlog.py
+++ b/nightshift/runlog.py
@ -92,17 +92,19 @@ def format_status_event_message(event: str, message: str, fields: dict[str, obje
        return f"Task: {task_id} | Starting" if task_id else "Starting task"
    if event == "stage.start" and stage_id:
        label = f"{stage_id} ({stage_type})" if stage_type else stage_id
-        return f"{prefix}Stage: {label}{retry_text}"
+        return f"{prefix}>> Stage: {label}{retry_text}"
    if event == "agent.start":
        model_text = f" | Model: {model}" if model else ""
        return f"{prefix}Agent: {agent_id or stage_id}{model_text}"
    if event == "command.start":
        return f"{prefix}Command: {command or stage_id}"
    if event == "stage.retry":
-        return f"{prefix}Retrying after {stage_id} -> {next_stage}{retry_text}"
+        return f"{prefix}Retry: {stage_id} -> {next_stage}{retry_text}"
    if event in {"stage.finish", "task.finish"} and status:
        target = f"Stage: {stage_id}" if event == "stage.finish" and stage_id else "Task"
-        return f"{prefix}{target} {status}"
+        reason = str(fields.get("reason", "") or "")
+        reason_text = f" | {reason}" if reason and status not in {"pass", "complete"} else ""
+        return f"{prefix}{target}: {status}{reason_text}"
    if event.endswith(".start"):
        return f"{prefix}{message}"
    return None
--- a/nightshift/terminal.py
+++ b/nightshift/terminal.py
@ -6,6 +6,7 @@ import os
 import sys
 from typing import TextIO
 import random
+import shutil
 import threading
 import time

@ -166,6 +167,7 @@ class TerminalAnimation:
        self._thread: threading.Thread | None = None
        self._width = 0
        self._lock = threading.Lock()
+        self._last_rendered = ""

    def __enter__(self) -> "TerminalAnimation":
        self.start()
@ -193,6 +195,25 @@ class TerminalAnimation:
        with self._lock:
            self.message = message

+    def emit(self, line: str) -> None:
+        if not self.enabled:
+            print(line)
+            return
+        with self._lock:
+            message = self.message
+        self._clear()
+        print(line)
+        with self._lock:
+            self.message = message
+        self._render_frame(0)
+
+    def finish(self, message: str, *, status: str = "") -> None:
+        if not self.enabled:
+            return
+        self.stop()
+        line = format_status_bar_message(message, status=status, stream=self.stream)
+        print(line)
+
    def _run(self) -> None:
        index = 1
        while not self._stop.is_set():
@ -205,8 +226,10 @@ class TerminalAnimation:
        with self._lock:
            message = self.message
        text = f"{frame} | {message}"
-        self._width = max(self._width, len(text))
-        self.stream.write("\r" + text.ljust(self._width))
+        visible_width = terminal_text_width(text)
+        self._width = max(self._width, visible_width)
+        self._last_rendered = text
+        self.stream.write("\r" + text + (" " * max(0, self._width - visible_width)))
        self.stream.flush()

    def _clear(self) -> None:
@ -245,6 +268,31 @@ def should_style(stream: TextIO | None = None) -> bool:
    return bool(getattr(stream, "isatty", lambda: False)())


+def terminal_text_width(text: str) -> int:
+    return len(strip_ansi_escape_sequences(text))
+
+
+def terminal_columns(default: int = 100) -> int:
+    return shutil.get_terminal_size((default, 20)).columns
+
+
+def truncate_terminal_text(text: str, max_width: int) -> str:
+    if max_width <= 1 or terminal_text_width(text) <= max_width:
+        return text
+    plain = strip_ansi_escape_sequences(text)
+    return plain[: max_width - 1].rstrip() + "…"
+
+
+def format_status_bar_message(message: str, *, status: str = "", stream: TextIO | None = None) -> str:
+    label = "NightShift"
+    if status:
+        label = f"{label} {status.upper()}"
+    color = _status_color(status) or CYAN
+    prefix = style_text(f"[{label}]", color=color, bold=True, stream=stream)
+    available = max(20, terminal_columns() - terminal_text_width(prefix) - 4)
+    return f"{prefix} {truncate_terminal_text(message, available)}"
+
+
 def style_text(text: str, *, color: str | None = None, bold: bool = False, dim: bool = False, stream: TextIO | None = None) -> str:
    if not should_style(stream):
        return text
@ -305,7 +353,7 @@ def format_console_event_line(
    color = _event_color(event, fields)
    if color is None:
        return line
-    return style_text(line, color=color, stream=stream)
+    return style_text(_decorate_event_line(line, event, fields), color=color, bold=_event_bold(event, fields), stream=stream)


 def format_plain_event_line(timestamp: str, event: str, message: str, fields: dict[str, object]) -> str:
@ -338,5 +386,49 @@ def _event_color(event: str, fields: dict[str, object]) -> str | None:
    return None


+def _status_color(status: str) -> str | None:
+    normalized = status.lower()
+    if normalized in {"complete", "pass", "success", "ok"}:
+        return GREEN
+    if normalized in {"fail", "failed", "error"}:
+        return RED
+    if normalized in {"retry", "warning", "warn", "blocked"}:
+        return YELLOW
+    return None
+
+
+def _event_bold(event: str, fields: dict[str, object]) -> bool:
+    event_name = event.lower()
+    status = str(fields.get("status", "")).lower()
+    return event_name in {"stage.start", "stage.finish", "stage.retry", "task.finish"} or status in {
+        "failed",
+        "fail",
+        "error",
+    }
+
+
+def _decorate_event_line(line: str, event: str, fields: dict[str, object]) -> str:
+    event_name = event.lower()
+    if event_name == "stage.start":
+        return f">> {line}"
+    if event_name == "stage.finish":
+        status = str(fields.get("status", "")).lower()
+        marker = "!!" if status in {"fail", "failed", "error"} else "OK"
+        return f"{marker} {line}"
+    if event_name == "stage.retry":
+        return f"RETRY {line}"
+    if event_name == "task.finish":
+        status = str(fields.get("status", "")).lower()
+        marker = "DONE" if status == "complete" else "FAIL"
+        return f"{marker} {line}"
+    return line
+
+
+def strip_ansi_escape_sequences(text: str) -> str:
+    import re
+
+    return re.sub(r"\x1b\[[0-?]*[ -/]*[@-~]", "", text)
+
+
 def _format_value(value: object) -> str:
    return str(value).replace("\n", " ").replace("\r", " ")
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -93,6 +93,10 @@ class AgentExecutorTests(unittest.TestCase):

        self.assertIn("Use only paths under these project-relative targets: `story/chapters`.", prompt)
        self.assertIn("This is the drafting stage", prompt)
+        self.assertIn("FILE: story/chapters/chapter-001/scene-001.md", prompt)
+        self.assertIn("---CONTENT---", prompt)
+        self.assertIn("---END---", prompt)
+        self.assertIn("Do not use markdown code fences", prompt)

    def test_command_agent_writes_output_and_returns_pass(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
--- a/tests/test_patches.py
+++ b/tests/test_patches.py
@ -247,6 +247,19 @@ test
            self.assertIn("new file mode 100644", patch)
            self.assertEqual(result.files, ("src/app.py", "src/test_app.py"))

+    def test_file_updates_parse_explicit_delimiters(self) -> None:
+        updates = parse_file_updates(
+            """FILE: story/chapters/chapter-001/scene-001.md
+---CONTENT---
+Sunlight did not belong here.
+---END---
+"""
+        )
+
+        self.assertEqual(len(updates), 1)
+        self.assertEqual(updates[0].path, "story/chapters/chapter-001/scene-001.md")
+        self.assertEqual(updates[0].content, "Sunlight did not belong here.\n")
+
    def test_file_updates_reject_duplicate_blocks(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
--- a/tests/test_terminal.py
+++ b/tests/test_terminal.py
@ -12,6 +12,7 @@ from nightshift.terminal import (
    animation_frames,
    format_banner,
    format_console_event_line,
+    format_status_bar_message,
    should_animate,
 )

@ -46,6 +47,16 @@ class TerminalStylingTests(unittest.TestCase):

        self.assertEqual(stream.getvalue(), "")

+    def test_terminal_animation_emit_prints_for_non_tty(self) -> None:
+        stream = StringIO()
+        output = StringIO()
+        animation = TerminalAnimation(stream=stream)
+
+        with patch("sys.stdout", output):
+            animation.emit("plain log")
+
+        self.assertEqual(output.getvalue().strip(), "plain log")
+
    def test_terminal_animation_renders_immediately_when_started(self) -> None:
        stream = FakeTTY()
        animation = TerminalAnimation(
@ -61,6 +72,31 @@ class TerminalStylingTests(unittest.TestCase):

        self.assertIn("[.  ] | Starting", output)

+    def test_terminal_animation_emit_clears_and_redraws_status(self) -> None:
+        stream = FakeTTY()
+        animation = TerminalAnimation(
+            name="status_dots",
+            message="Stage: plan",
+            stream=stream,
+            interval_seconds=60,
+        )
+
+        output = StringIO()
+        with patch("sys.stdout", output):
+            animation.start()
+            animation.emit("log line")
+            stream_output = stream.getvalue()
+            animation.stop()
+
+        self.assertIn("log line", output.getvalue())
+        self.assertGreaterEqual(stream_output.count("Stage: plan"), 2)
+
+    def test_format_status_bar_message_uses_status_color(self) -> None:
+        line = format_status_bar_message("Task done", status="complete", stream=FakeTTY())
+
+        self.assertIn("\x1b[32m", line)
+        self.assertIn("[NightShift COMPLETE]", line)
+
    def test_terminal_animation_does_not_depend_on_color_output(self) -> None:
        stream = FakeTTY()
        with patch.dict("os.environ", {"NO_COLOR": "1"}):
@ -83,6 +119,8 @@ class TerminalStylingTests(unittest.TestCase):
        )
        self.assertIn("\x1b[32m", success)
        self.assertIn("\x1b[31m", failure)
+        self.assertIn("DONE ", success)
+        self.assertIn("FAIL ", failure)
        self.assertTrue(success.endswith("\x1b[0m"))
        self.assertTrue(failure.endswith("\x1b[0m"))

@ -130,7 +168,7 @@ class TerminalStylingTests(unittest.TestCase):
                model="qwen3-coder:30b",
            )

-            self.assertEqual(statuses[0], "Task: TASK-001 | Stage: implement (file_writer) retry 2")
+            self.assertEqual(statuses[0], "Task: TASK-001 | >> Stage: implement (file_writer) retry 2")
            self.assertEqual(statuses[1], "Task: TASK-001 | Agent: implementer | Model: qwen3-coder:30b")

    def test_format_status_event_message_reports_retries(self) -> None:
@ -145,7 +183,7 @@ class TerminalStylingTests(unittest.TestCase):
            },
        )

-        self.assertEqual(message, "Task: TASK-001 | Retrying after test -> implement retry 1")
+        self.assertEqual(message, "Task: TASK-001 | Retry: test -> implement retry 1")


 if __name__ == "__main__":