From a32142e9eaa03ae69d33e2a2a10e44623cc12619 Mon Sep 17 00:00:00 2001
From: "K. Hodges" <khodges42@gmail.com>
Date: Sun, 17 May 2026 13:34:19 -0700
Subject: [PATCH] Bugfixes from test run

---
 nightshift/agents.py     | 112 +++++++++++++++++++++++++++++----------
 nightshift/artifacts.py  |   2 +-
 nightshift/repo_tools.py |  35 ++++++++++--
 nightshift/runlog.py     |   7 ++-
 nightshift/web.py        |  19 +++++--
 tests/test_agents.py     |  30 +++++++----
 tests/test_artifacts.py  |   7 ++-
 tests/test_pipeline.py   |   3 ++
 tests/test_repo_tools.py |  40 ++++++++++++++
 9 files changed, 207 insertions(+), 48 deletions(-)

diff --git a/nightshift/agents.py b/nightshift/agents.py
index 45e5510..1bf5652 100644
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@@ -6,7 +6,9 @@ from dataclasses import dataclass
 import json
 import os
 from pathlib import Path
+import re
 import subprocess
+import tempfile
 import time
 from urllib import request
 from urllib.error import URLError
@@ -21,6 +23,7 @@ from .tasks import Task
 
 
 DEFAULT_AGENT_TIMEOUT_SECONDS = 600
+OLLAMA_HEARTBEAT_SECONDS = 30.0
 
 
 @dataclass(frozen=True)
@@ -224,27 +227,74 @@ class AgentExecutor:
         if agent.temperature is not None:
             prompt_input = f"/set parameter temperature {agent.temperature}\n{prompt}"
         started = time.monotonic()
+        self.logger.event(
+            "ollama.start",
+            "Starting Ollama model invocation",
+            agent_id=agent.id,
+            model=agent.model,
+            timeout_seconds=self.timeout_seconds,
+        )
         try:
-            completed = subprocess.run(
-                ["ollama", "run", agent.model],
-                cwd=self.project_root,
-                input=prompt_input,
-                capture_output=True,
-                text=True,
-                encoding="utf-8",
-                errors="replace",
-                timeout=self.timeout_seconds,
-            )
-            duration = time.monotonic() - started
-            return AgentInvocation(
-                agent_id=agent.id,
-                command=command,
-                prompt=prompt_input,
-                exit_code=completed.returncode,
-                stdout=_coerce_output(completed.stdout),
-                stderr=_coerce_output(completed.stderr),
-                duration_seconds=duration,
-            )
+            with tempfile.TemporaryFile("w+", encoding="utf-8", errors="replace") as stdout_file:
+                with tempfile.TemporaryFile("w+", encoding="utf-8", errors="replace") as stderr_file:
+                    process = subprocess.Popen(
+                        ["ollama", "run", agent.model],
+                        cwd=self.project_root,
+                        stdin=subprocess.PIPE,
+                        stdout=stdout_file,
+                        stderr=stderr_file,
+                        text=True,
+                        encoding="utf-8",
+                        errors="replace",
+                    )
+                    assert process.stdin is not None
+                    process.stdin.write(prompt_input)
+                    process.stdin.close()
+                    last_heartbeat = started
+                    timed_out = False
+                    while process.poll() is None:
+                        now = time.monotonic()
+                        elapsed = now - started
+                        if elapsed > self.timeout_seconds:
+                            process.kill()
+                            timed_out = True
+                            break
+                        if now - last_heartbeat >= OLLAMA_HEARTBEAT_SECONDS:
+                            self.logger.event(
+                                "ollama.wait",
+                                "Ollama invocation still running",
+                                agent_id=agent.id,
+                                model=agent.model,
+                                elapsed=f"{elapsed:.0f}s",
+                            )
+                            last_heartbeat = now
+                        time.sleep(1.0)
+                    process.wait()
+                    duration = time.monotonic() - started
+                    stdout_file.seek(0)
+                    stderr_file.seek(0)
+                    stdout = stdout_file.read()
+                    stderr = stderr_file.read()
+                    if timed_out:
+                        return AgentInvocation(
+                            agent_id=agent.id,
+                            command=command,
+                            prompt=prompt_input,
+                            exit_code=-1,
+                            stdout=stdout,
+                            stderr=stderr,
+                            duration_seconds=duration,
+                            timed_out=True,
+                        )
+                    return AgentInvocation(
+                        agent_id=agent.id,
+                        command=command,
+                        prompt=prompt_input,
+                        exit_code=process.returncode or 0,
+                        stdout=stdout,
+                        stderr=stderr,
+                        duration_seconds=duration,
+                    )
         except FileNotFoundError as exc:
             duration = time.monotonic() - started
             return AgentInvocation(
@@ -256,17 +306,16 @@ class AgentExecutor:
                 stderr=str(exc),
                 duration_seconds=duration,
             )
-        except subprocess.TimeoutExpired as exc:
+        except OSError as exc:
             duration = time.monotonic() - started
             return AgentInvocation(
                 agent_id=agent.id,
                 command=command,
                 prompt=prompt_input,
-                exit_code=-1,
-                stdout=_coerce_output(exc.stdout),
-                stderr=_coerce_output(exc.stderr),
+                exit_code=1,
+                stdout="",
+                stderr=str(exc),
                 duration_seconds=duration,
-                timed_out=True,
             )
 
     def _invoke_openai_compatible(self, agent: AgentConfig, prompt: str) -> AgentInvocation:
@@ -390,8 +439,12 @@ def _coerce_output(value: str | bytes | None) -> str:
     if value is None:
         return ""
     if isinstance(value, bytes):
-        return value.decode("utf-8", errors="replace")
-    return value
+        value = value.decode("utf-8", errors="replace")
+    return strip_ansi_escape_sequences(value)
+
+
+def strip_ansi_escape_sequences(value: str) -> str:
+    return re.sub(r"\x1b\[[0-?]*[ -/]*[@-~]", "", value)
 
 
 def _extract_openai_content(raw: str) -> str:
@@ -446,6 +499,11 @@ def output_contract_for(stage: StageConfig) -> str:
                 "  path: <relative path>",
                 "  pattern: <glob for list_files or regex for grep>",
                 "",
+                "Use at most 5 lookup requests.",
+                "Do not repeat the same lookup request.",
+                "Prefer read_file for likely-relevant files over many grep variations.",
+                "Do not search .nightshift, .git, virtualenvs, caches, or artifact directories.",
+                "",
                 "NightShift will run these read-only lookup tools, save files-inspected.md, and re-run this planner stage with the retrieved context.",
             ]
         )
diff --git a/nightshift/artifacts.py b/nightshift/artifacts.py
index a86a463..45fdb52 100644
--- a/nightshift/artifacts.py
+++ b/nightshift/artifacts.py
@@ -154,7 +154,7 @@ def default_run_id(now: datetime | None = None) -> str:
     """Return a filesystem-friendly UTC run id."""
 
     value = now or datetime.now(timezone.utc)
-    return value.strftime("%Y%m%dT%H%M%SZ")
+    return value.strftime("%Y%m%dT%H%M%S.%fZ")
 
 
 def _safe_artifact_segment(value: str, context: str) -> str:
diff --git a/nightshift/repo_tools.py b/nightshift/repo_tools.py
index d299165..963bf02 100644
--- a/nightshift/repo_tools.py
+++ b/nightshift/repo_tools.py
@@ -16,6 +16,8 @@ from .safety import resolve_inside_root, resolve_project_root, validate_scoped_p
 
 DEFAULT_MAX_BYTES = 20_000
 DEFAULT_MAX_MATCHES = 100
+DEFAULT_MAX_LOOKUP_REQUESTS = 8
+SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"}
 
 
 @dataclass(frozen=True)
@@ -55,7 +57,7 @@ class RepoTools:
         relative_files = [
             _relative(item, self.project_root)
             for item in sorted(candidates)
-            if fnmatch.fnmatch(item.name, pattern)
+            if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root)
         ]
         lines = relative_files[:max_files]
         if len(relative_files) > max_files:
@@ -64,6 +66,8 @@ class RepoTools:
 
     def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str:
         file_path = self._resolve_scoped(path, "read_file path")
+        if _is_skipped_repo_path(file_path, self.project_root):
+            return f"Path is skipped for repository lookup: {path}"
         if not file_path.exists() or not file_path.is_file():
             return f"File not found: {path}"
         data = file_path.read_bytes()[:max_bytes + 1]
@@ -85,6 +89,8 @@ class RepoTools:
         files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()]
         matches: list[str] = []
         for file_path in sorted(files):
+            if _is_skipped_repo_path(file_path, self.project_root):
+                continue
             try:
                 text = file_path.read_text(encoding="utf-8", errors="replace")
             except OSError:
@@ -110,7 +116,8 @@ class RepoTools:
 
     def execute_requests(self, task_id: str, requests: list[ToolCall], filename: str = "repo-tools.md") -> str:
         completed: list[ToolCall] = []
-        for request in requests:
+        unique_requests = dedupe_tool_calls(requests)[:DEFAULT_MAX_LOOKUP_REQUESTS]
+        for request in unique_requests:
             self.logger.event(
                 "tool.call",
                 "Running repo lookup tool",
@@ -175,7 +182,7 @@ def format_tool_calls(calls: list[ToolCall]) -> str:
     return "\n".join(lines)
 
 
-def parse_lookup_requests(text: str) -> list[ToolCall]:
+def parse_lookup_requests(text: str, max_requests: int = DEFAULT_MAX_LOOKUP_REQUESTS) -> list[ToolCall]:
     """Parse a small YAML-like lookup request list from model output."""
 
     lines = text.splitlines()
@@ -215,7 +222,19 @@ def parse_lookup_requests(text: str) -> list[ToolCall]:
             flush()
         current[key] = value
     flush()
-    return requests
+    return dedupe_tool_calls(requests)[:max_requests]
+
+
+def dedupe_tool_calls(requests: list[ToolCall]) -> list[ToolCall]:
+    seen: set[tuple[str, tuple[tuple[str, str], ...]]] = set()
+    unique: list[ToolCall] = []
+    for request in requests:
+        key = (request.name, tuple(sorted(request.arguments.items())))
+        if key in seen:
+            continue
+        seen.add(key)
+        unique.append(request)
+    return unique
 
 
 def extract_agent_stdout(artifact_text: str) -> str:
@@ -249,3 +268,11 @@ def _relative(path: Path, root: Path) -> str:
         return path.relative_to(root).as_posix()
     except ValueError:
         return path.as_posix()
+
+
+def _is_skipped_repo_path(path: Path, root: Path) -> bool:
+    try:
+        parts = set(path.relative_to(root).parts)
+    except ValueError:
+        parts = set(path.parts)
+    return bool(parts & SKIPPED_REPO_PARTS)
diff --git a/nightshift/runlog.py b/nightshift/runlog.py
index b8a7014..859e37a 100644
--- a/nightshift/runlog.py
+++ b/nightshift/runlog.py
@@ -27,18 +27,23 @@ class RunLogger:
         self.console = console
         self._run_log_path: Path | None = None
         self._aggregate_log_path: Path | None = None
+        self._initialized_run_logs: set[Path] = set()
 
     def bind(self, artifacts: ArtifactStore) -> None:
         artifacts.initialize_run()
         self._run_log_path = artifacts.run_log_path
         self._aggregate_log_path = artifacts.aggregate_log_path
+        if self._run_log_path not in self._initialized_run_logs:
+            self._run_log_path.parent.mkdir(parents=True, exist_ok=True)
+            self._run_log_path.write_text("", encoding="utf-8")
+            self._initialized_run_logs.add(self._run_log_path)
 
     def event(self, event: str, message: str, **fields: object) -> None:
         safe_fields = _redact_fields(fields)
         line = format_log_line(LogEvent(event=event, message=message, fields=safe_fields))
         if self.console is not None:
             self.console(line)
-        for path in (self._run_log_path, self._aggregate_log_path):
+        for path in (self._run_log_path,):
             if path is None:
                 continue
             path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/nightshift/web.py b/nightshift/web.py
index bbea4d5..6be1def 100644
--- a/nightshift/web.py
+++ b/nightshift/web.py
@@ -50,13 +50,18 @@ def read_artifact(run_path: Path, relative_path: str) -> str:
 
 def render_dashboard(artifact_dir: str | Path) -> str:
     runs = list_runs(artifact_dir)
-    body = ["<h1>NightShift Dashboard</h1>", '<meta http-equiv="refresh" content="5">']
+    body = [
+        "<h1>NightShift Dashboard</h1>",
+        '<meta http-equiv="refresh" content="5">',
+        "<p>Showing artifact files from the newest run first. This page is read-only and refreshes every 5 seconds.</p>",
+    ]
     if not runs:
         body.append("<p>No runs found.</p>")
-    for run in runs:
+    for index, run in enumerate(runs):
+        title = "Latest Run" if index == 0 else "Older Run"
         body.extend(
             [
-                f"<section><h2>{escape(run.name)}</h2>",
+                f"<section><h2>{title}: {escape(run.name)}</h2>",
                 "<pre>",
                 escape(run.summary),
                 "</pre>",
@@ -84,11 +89,15 @@ def create_app(project_root: str | Path = ".", artifact_dir: str | Path = ".nigh
 
     @app.get("/")
     def index():
-        return Response(render_dashboard(artifacts), mimetype="text/html")
+        response = Response(render_dashboard(artifacts), mimetype="text/html")
+        response.headers["Cache-Control"] = "no-store, max-age=0"
+        return response
 
     @app.get("/runs/<run_id>/<path:artifact_path>")
     def artifact(run_id: str, artifact_path: str):
         content = read_artifact(artifacts / "runs" / run_id, artifact_path)
-        return Response(f"<pre>{escape(content)}</pre>", mimetype="text/html")
+        response = Response(f"<pre>{escape(content)}</pre>", mimetype="text/html")
+        response.headers["Cache-Control"] = "no-store, max-age=0"
+        return response
 
     return app
diff --git a/tests/test_agents.py b/tests/test_agents.py
index 6b73c3e..7e4392f 100644
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -1,9 +1,10 @@
 from pathlib import Path
+import io
 import tempfile
 import unittest
 from unittest.mock import MagicMock, patch
 
-from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output
+from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output, strip_ansi_escape_sequences
 from nightshift.agents import AgentInvocation, format_agent_invocation
 from nightshift.artifacts import ArtifactStore
 from nightshift.config import AgentConfig, StageConfig
@@ -118,17 +119,25 @@ class AgentExecutorTests(unittest.TestCase):
             task = parse_tasks(TASK_MD)[0]
             stage = StageConfig(id="plan", type="agent", agent="planner", output="plan.md")
 
-            completed = type(
-                "Completed",
-                (),
-                {"returncode": 0, "stdout": "ollama output", "stderr": ""},
-            )()
-            with patch("nightshift.agents.subprocess.run", return_value=completed) as run:
+            class FakePopen:
+                def __init__(self, args, cwd=None, stdin=None, stdout=None, stderr=None, **kwargs):
+                    self.args = args
+                    self.stdin = io.StringIO()
+                    self.returncode = 0
+                    stdout.write("ollama output")
+
+                def poll(self):
+                    return self.returncode
+
+                def wait(self):
+                    return self.returncode
+
+            with patch("nightshift.agents.subprocess.Popen", side_effect=FakePopen) as popen:
                 result = executor.run_stage(stage, task)
 
             self.assertEqual(result.status, "pass")
-            run.assert_called_once()
-            self.assertEqual(run.call_args.args[0], ["ollama", "run", "tiny-model"])
+            popen.assert_called_once()
+            self.assertEqual(popen.call_args.args[0], ["ollama", "run", "tiny-model"])
             output = (root / result.output_path).read_text(encoding="utf-8")
             self.assertIn("ollama run tiny-model", output)
 
@@ -185,6 +194,9 @@ class AgentExecutorTests(unittest.TestCase):
         self.assertIn("Agent: `planner`", output)
         self.assertIn("## stderr", output)
 
+    def test_strip_ansi_escape_sequences(self) -> None:
+        self.assertEqual(strip_ansi_escape_sequences("\x1b[?25lthinking\x1b[0m"), "thinking")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_artifacts.py b/tests/test_artifacts.py
index 8403442..8b91c5b 100644
--- a/tests/test_artifacts.py
+++ b/tests/test_artifacts.py
@@ -2,7 +2,7 @@ from pathlib import Path
 import tempfile
 import unittest
 
-from nightshift.artifacts import ArtifactStore
+from nightshift.artifacts import ArtifactStore, default_run_id
 from nightshift.errors import ArtifactError
 from nightshift.init import init_project
 from nightshift.tasks import parse_task_file
@@ -62,6 +62,11 @@ class ArtifactStoreTests(unittest.TestCase):
             with self.assertRaisesRegex(ArtifactError, "task id contains unsafe"):
                 store.create_task_dir("../TASK-001")
 
+    def test_default_run_id_has_subsecond_precision(self) -> None:
+        run_id = default_run_id()
+
+        self.assertRegex(run_id, r"^\d{8}T\d{6}\.\d{6}Z$")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index f84c6be..1526e78 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -240,10 +240,13 @@ Acceptance Criteria:
             config = make_config(root, stages)
             runner = PipelineRunner(config, artifacts)
             task = parse_tasks(TASK_MD)[0]
+            artifacts.initialize_run()
+            artifacts.run_log_path.write_text("old run log\n", encoding="utf-8")
 
             runner.run_task(task)
 
             log = (root / ".nightshift" / "runs" / "test-run" / "run.log").read_text(encoding="utf-8")
+            self.assertNotIn("old run log", log)
             self.assertIn("task.start", log)
             self.assertIn("stage.start", log)
             self.assertIn("agent.finish", log)
diff --git a/tests/test_repo_tools.py b/tests/test_repo_tools.py
index 73e84a0..055f1a4 100644
--- a/tests/test_repo_tools.py
+++ b/tests/test_repo_tools.py
@@ -42,6 +42,46 @@ lookup_requests:
         self.assertEqual(requests[0].arguments["path"], "nightshift/pipeline.py")
         self.assertEqual(requests[1].arguments["pattern"], "PipelineRunner")
 
+    def test_parse_lookup_requests_dedupes_and_caps(self) -> None:
+        repeated = "\n".join(
+            [
+                "lookup_requests:",
+                *[
+                    "- tool: grep\n  path: .\n  pattern: parse.*tokenize"
+                    for _ in range(20)
+                ],
+                "- tool: read_file\n  path: lisp.py",
+            ]
+        )
+
+        requests = parse_lookup_requests(repeated)
+
+        self.assertEqual(len(requests), 2)
+        self.assertEqual([request.name for request in requests], ["grep", "read_file"])
+
+    def test_repo_tools_skip_artifact_directories(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            (root / ".nightshift" / "runs").mkdir(parents=True)
+            (root / ".nightshift" / "runs" / "run.log").write_text("needle\n", encoding="utf-8")
+            (root / "src").mkdir()
+            (root / "src" / "app.py").write_text("needle\n", encoding="utf-8")
+            safety = SafetyConfig(
+                require_clean_worktree=False,
+                scoped_paths=(".",),
+                allowed_commands=(),
+                forbidden_commands=(),
+            )
+            tools = RepoTools(root, safety, ArtifactStore(root, ".nightshift", run_id="test-run"))
+
+            files = tools.list_files(".")
+            matches = tools.grep("needle", ".")
+
+            self.assertIn("src/app.py", files)
+            self.assertNotIn(".nightshift", files)
+            self.assertIn("src/app.py:1", matches)
+            self.assertNotIn(".nightshift", matches)
+
 
 if __name__ == "__main__":
     unittest.main()