From a32142e9eaa03ae69d33e2a2a10e44623cc12619 Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Sun, 17 May 2026 13:34:19 -0700 Subject: [PATCH] Bugfixes from test run --- nightshift/agents.py | 112 +++++++++++++++++++++++++++++---------- nightshift/artifacts.py | 2 +- nightshift/repo_tools.py | 35 ++++++++++-- nightshift/runlog.py | 7 ++- nightshift/web.py | 19 +++++-- tests/test_agents.py | 30 +++++++---- tests/test_artifacts.py | 7 ++- tests/test_pipeline.py | 3 ++ tests/test_repo_tools.py | 40 ++++++++++++++ 9 files changed, 207 insertions(+), 48 deletions(-) diff --git a/nightshift/agents.py b/nightshift/agents.py index 45e5510..1bf5652 100644 --- a/nightshift/agents.py +++ b/nightshift/agents.py @@ -6,7 +6,9 @@ from dataclasses import dataclass import json import os from pathlib import Path +import re import subprocess +import tempfile import time from urllib import request from urllib.error import URLError @@ -21,6 +23,7 @@ from .tasks import Task DEFAULT_AGENT_TIMEOUT_SECONDS = 600 +OLLAMA_HEARTBEAT_SECONDS = 30.0 @dataclass(frozen=True) @@ -224,27 +227,74 @@ class AgentExecutor: if agent.temperature is not None: prompt_input = f"/set parameter temperature {agent.temperature}\n{prompt}" started = time.monotonic() + self.logger.event( + "ollama.start", + "Starting Ollama model invocation", + agent_id=agent.id, + model=agent.model, + timeout_seconds=self.timeout_seconds, + ) try: - completed = subprocess.run( - ["ollama", "run", agent.model], - cwd=self.project_root, - input=prompt_input, - capture_output=True, - text=True, - encoding="utf-8", - errors="replace", - timeout=self.timeout_seconds, - ) - duration = time.monotonic() - started - return AgentInvocation( - agent_id=agent.id, - command=command, - prompt=prompt_input, - exit_code=completed.returncode, - stdout=_coerce_output(completed.stdout), - stderr=_coerce_output(completed.stderr), - duration_seconds=duration, - ) + with tempfile.TemporaryFile("w+", encoding="utf-8", errors="replace") as stdout_file: + with tempfile.TemporaryFile("w+", encoding="utf-8", errors="replace") as stderr_file: + process = subprocess.Popen( + ["ollama", "run", agent.model], + cwd=self.project_root, + stdin=subprocess.PIPE, + stdout=stdout_file, + stderr=stderr_file, + text=True, + encoding="utf-8", + errors="replace", + ) + assert process.stdin is not None + process.stdin.write(prompt_input) + process.stdin.close() + last_heartbeat = started + timed_out = False + while process.poll() is None: + now = time.monotonic() + elapsed = now - started + if elapsed > self.timeout_seconds: + process.kill() + timed_out = True + break + if now - last_heartbeat >= OLLAMA_HEARTBEAT_SECONDS: + self.logger.event( + "ollama.wait", + "Ollama invocation still running", + agent_id=agent.id, + model=agent.model, + elapsed=f"{elapsed:.0f}s", + ) + last_heartbeat = now + time.sleep(1.0) + process.wait() + duration = time.monotonic() - started + stdout_file.seek(0) + stderr_file.seek(0) + stdout = stdout_file.read() + stderr = stderr_file.read() + if timed_out: + return AgentInvocation( + agent_id=agent.id, + command=command, + prompt=prompt_input, + exit_code=-1, + stdout=stdout, + stderr=stderr, + duration_seconds=duration, + timed_out=True, + ) + return AgentInvocation( + agent_id=agent.id, + command=command, + prompt=prompt_input, + exit_code=process.returncode or 0, + stdout=stdout, + stderr=stderr, + duration_seconds=duration, + ) except FileNotFoundError as exc: duration = time.monotonic() - started return AgentInvocation( @@ -256,17 +306,16 @@ class AgentExecutor: stderr=str(exc), duration_seconds=duration, ) - except subprocess.TimeoutExpired as exc: + except OSError as exc: duration = time.monotonic() - started return AgentInvocation( agent_id=agent.id, command=command, prompt=prompt_input, - exit_code=-1, - stdout=_coerce_output(exc.stdout), - stderr=_coerce_output(exc.stderr), + exit_code=1, + stdout="", + stderr=str(exc), duration_seconds=duration, - timed_out=True, ) def _invoke_openai_compatible(self, agent: AgentConfig, prompt: str) -> AgentInvocation: @@ -390,8 +439,12 @@ def _coerce_output(value: str | bytes | None) -> str: if value is None: return "" if isinstance(value, bytes): - return value.decode("utf-8", errors="replace") - return value + value = value.decode("utf-8", errors="replace") + return strip_ansi_escape_sequences(value) + + +def strip_ansi_escape_sequences(value: str) -> str: + return re.sub(r"\x1b\[[0-?]*[ -/]*[@-~]", "", value) def _extract_openai_content(raw: str) -> str: @@ -446,6 +499,11 @@ def output_contract_for(stage: StageConfig) -> str: " path: ", " pattern: ", "", + "Use at most 5 lookup requests.", + "Do not repeat the same lookup request.", + "Prefer read_file for likely-relevant files over many grep variations.", + "Do not search .nightshift, .git, virtualenvs, caches, or artifact directories.", + "", "NightShift will run these read-only lookup tools, save files-inspected.md, and re-run this planner stage with the retrieved context.", ] ) diff --git a/nightshift/artifacts.py b/nightshift/artifacts.py index a86a463..45fdb52 100644 --- a/nightshift/artifacts.py +++ b/nightshift/artifacts.py @@ -154,7 +154,7 @@ def default_run_id(now: datetime | None = None) -> str: """Return a filesystem-friendly UTC run id.""" value = now or datetime.now(timezone.utc) - return value.strftime("%Y%m%dT%H%M%SZ") + return value.strftime("%Y%m%dT%H%M%S.%fZ") def _safe_artifact_segment(value: str, context: str) -> str: diff --git a/nightshift/repo_tools.py b/nightshift/repo_tools.py index d299165..963bf02 100644 --- a/nightshift/repo_tools.py +++ b/nightshift/repo_tools.py @@ -16,6 +16,8 @@ from .safety import resolve_inside_root, resolve_project_root, validate_scoped_p DEFAULT_MAX_BYTES = 20_000 DEFAULT_MAX_MATCHES = 100 +DEFAULT_MAX_LOOKUP_REQUESTS = 8 +SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"} @dataclass(frozen=True) @@ -55,7 +57,7 @@ class RepoTools: relative_files = [ _relative(item, self.project_root) for item in sorted(candidates) - if fnmatch.fnmatch(item.name, pattern) + if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root) ] lines = relative_files[:max_files] if len(relative_files) > max_files: @@ -64,6 +66,8 @@ class RepoTools: def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str: file_path = self._resolve_scoped(path, "read_file path") + if _is_skipped_repo_path(file_path, self.project_root): + return f"Path is skipped for repository lookup: {path}" if not file_path.exists() or not file_path.is_file(): return f"File not found: {path}" data = file_path.read_bytes()[:max_bytes + 1] @@ -85,6 +89,8 @@ class RepoTools: files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()] matches: list[str] = [] for file_path in sorted(files): + if _is_skipped_repo_path(file_path, self.project_root): + continue try: text = file_path.read_text(encoding="utf-8", errors="replace") except OSError: @@ -110,7 +116,8 @@ class RepoTools: def execute_requests(self, task_id: str, requests: list[ToolCall], filename: str = "repo-tools.md") -> str: completed: list[ToolCall] = [] - for request in requests: + unique_requests = dedupe_tool_calls(requests)[:DEFAULT_MAX_LOOKUP_REQUESTS] + for request in unique_requests: self.logger.event( "tool.call", "Running repo lookup tool", @@ -175,7 +182,7 @@ def format_tool_calls(calls: list[ToolCall]) -> str: return "\n".join(lines) -def parse_lookup_requests(text: str) -> list[ToolCall]: +def parse_lookup_requests(text: str, max_requests: int = DEFAULT_MAX_LOOKUP_REQUESTS) -> list[ToolCall]: """Parse a small YAML-like lookup request list from model output.""" lines = text.splitlines() @@ -215,7 +222,19 @@ def parse_lookup_requests(text: str) -> list[ToolCall]: flush() current[key] = value flush() - return requests + return dedupe_tool_calls(requests)[:max_requests] + + +def dedupe_tool_calls(requests: list[ToolCall]) -> list[ToolCall]: + seen: set[tuple[str, tuple[tuple[str, str], ...]]] = set() + unique: list[ToolCall] = [] + for request in requests: + key = (request.name, tuple(sorted(request.arguments.items()))) + if key in seen: + continue + seen.add(key) + unique.append(request) + return unique def extract_agent_stdout(artifact_text: str) -> str: @@ -249,3 +268,11 @@ def _relative(path: Path, root: Path) -> str: return path.relative_to(root).as_posix() except ValueError: return path.as_posix() + + +def _is_skipped_repo_path(path: Path, root: Path) -> bool: + try: + parts = set(path.relative_to(root).parts) + except ValueError: + parts = set(path.parts) + return bool(parts & SKIPPED_REPO_PARTS) diff --git a/nightshift/runlog.py b/nightshift/runlog.py index b8a7014..859e37a 100644 --- a/nightshift/runlog.py +++ b/nightshift/runlog.py @@ -27,18 +27,23 @@ class RunLogger: self.console = console self._run_log_path: Path | None = None self._aggregate_log_path: Path | None = None + self._initialized_run_logs: set[Path] = set() def bind(self, artifacts: ArtifactStore) -> None: artifacts.initialize_run() self._run_log_path = artifacts.run_log_path self._aggregate_log_path = artifacts.aggregate_log_path + if self._run_log_path not in self._initialized_run_logs: + self._run_log_path.parent.mkdir(parents=True, exist_ok=True) + self._run_log_path.write_text("", encoding="utf-8") + self._initialized_run_logs.add(self._run_log_path) def event(self, event: str, message: str, **fields: object) -> None: safe_fields = _redact_fields(fields) line = format_log_line(LogEvent(event=event, message=message, fields=safe_fields)) if self.console is not None: self.console(line) - for path in (self._run_log_path, self._aggregate_log_path): + for path in (self._run_log_path,): if path is None: continue path.parent.mkdir(parents=True, exist_ok=True) diff --git a/nightshift/web.py b/nightshift/web.py index bbea4d5..6be1def 100644 --- a/nightshift/web.py +++ b/nightshift/web.py @@ -50,13 +50,18 @@ def read_artifact(run_path: Path, relative_path: str) -> str: def render_dashboard(artifact_dir: str | Path) -> str: runs = list_runs(artifact_dir) - body = ["

NightShift Dashboard

", ''] + body = [ + "

NightShift Dashboard

", + '', + "

Showing artifact files from the newest run first. This page is read-only and refreshes every 5 seconds.

", + ] if not runs: body.append("

No runs found.

") - for run in runs: + for index, run in enumerate(runs): + title = "Latest Run" if index == 0 else "Older Run" body.extend( [ - f"

{escape(run.name)}

", + f"

{title}: {escape(run.name)}

", "
",
                 escape(run.summary),
                 "
", @@ -84,11 +89,15 @@ def create_app(project_root: str | Path = ".", artifact_dir: str | Path = ".nigh @app.get("/") def index(): - return Response(render_dashboard(artifacts), mimetype="text/html") + response = Response(render_dashboard(artifacts), mimetype="text/html") + response.headers["Cache-Control"] = "no-store, max-age=0" + return response @app.get("/runs//") def artifact(run_id: str, artifact_path: str): content = read_artifact(artifacts / "runs" / run_id, artifact_path) - return Response(f"
{escape(content)}
", mimetype="text/html") + response = Response(f"
{escape(content)}
", mimetype="text/html") + response.headers["Cache-Control"] = "no-store, max-age=0" + return response return app diff --git a/tests/test_agents.py b/tests/test_agents.py index 6b73c3e..7e4392f 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -1,9 +1,10 @@ from pathlib import Path +import io import tempfile import unittest from unittest.mock import MagicMock, patch -from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output +from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output, strip_ansi_escape_sequences from nightshift.agents import AgentInvocation, format_agent_invocation from nightshift.artifacts import ArtifactStore from nightshift.config import AgentConfig, StageConfig @@ -118,17 +119,25 @@ class AgentExecutorTests(unittest.TestCase): task = parse_tasks(TASK_MD)[0] stage = StageConfig(id="plan", type="agent", agent="planner", output="plan.md") - completed = type( - "Completed", - (), - {"returncode": 0, "stdout": "ollama output", "stderr": ""}, - )() - with patch("nightshift.agents.subprocess.run", return_value=completed) as run: + class FakePopen: + def __init__(self, args, cwd=None, stdin=None, stdout=None, stderr=None, **kwargs): + self.args = args + self.stdin = io.StringIO() + self.returncode = 0 + stdout.write("ollama output") + + def poll(self): + return self.returncode + + def wait(self): + return self.returncode + + with patch("nightshift.agents.subprocess.Popen", side_effect=FakePopen) as popen: result = executor.run_stage(stage, task) self.assertEqual(result.status, "pass") - run.assert_called_once() - self.assertEqual(run.call_args.args[0], ["ollama", "run", "tiny-model"]) + popen.assert_called_once() + self.assertEqual(popen.call_args.args[0], ["ollama", "run", "tiny-model"]) output = (root / result.output_path).read_text(encoding="utf-8") self.assertIn("ollama run tiny-model", output) @@ -185,6 +194,9 @@ class AgentExecutorTests(unittest.TestCase): self.assertIn("Agent: `planner`", output) self.assertIn("## stderr", output) + def test_strip_ansi_escape_sequences(self) -> None: + self.assertEqual(strip_ansi_escape_sequences("\x1b[?25lthinking\x1b[0m"), "thinking") + if __name__ == "__main__": unittest.main() diff --git a/tests/test_artifacts.py b/tests/test_artifacts.py index 8403442..8b91c5b 100644 --- a/tests/test_artifacts.py +++ b/tests/test_artifacts.py @@ -2,7 +2,7 @@ from pathlib import Path import tempfile import unittest -from nightshift.artifacts import ArtifactStore +from nightshift.artifacts import ArtifactStore, default_run_id from nightshift.errors import ArtifactError from nightshift.init import init_project from nightshift.tasks import parse_task_file @@ -62,6 +62,11 @@ class ArtifactStoreTests(unittest.TestCase): with self.assertRaisesRegex(ArtifactError, "task id contains unsafe"): store.create_task_dir("../TASK-001") + def test_default_run_id_has_subsecond_precision(self) -> None: + run_id = default_run_id() + + self.assertRegex(run_id, r"^\d{8}T\d{6}\.\d{6}Z$") + if __name__ == "__main__": unittest.main() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f84c6be..1526e78 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -240,10 +240,13 @@ Acceptance Criteria: config = make_config(root, stages) runner = PipelineRunner(config, artifacts) task = parse_tasks(TASK_MD)[0] + artifacts.initialize_run() + artifacts.run_log_path.write_text("old run log\n", encoding="utf-8") runner.run_task(task) log = (root / ".nightshift" / "runs" / "test-run" / "run.log").read_text(encoding="utf-8") + self.assertNotIn("old run log", log) self.assertIn("task.start", log) self.assertIn("stage.start", log) self.assertIn("agent.finish", log) diff --git a/tests/test_repo_tools.py b/tests/test_repo_tools.py index 73e84a0..055f1a4 100644 --- a/tests/test_repo_tools.py +++ b/tests/test_repo_tools.py @@ -42,6 +42,46 @@ lookup_requests: self.assertEqual(requests[0].arguments["path"], "nightshift/pipeline.py") self.assertEqual(requests[1].arguments["pattern"], "PipelineRunner") + def test_parse_lookup_requests_dedupes_and_caps(self) -> None: + repeated = "\n".join( + [ + "lookup_requests:", + *[ + "- tool: grep\n path: .\n pattern: parse.*tokenize" + for _ in range(20) + ], + "- tool: read_file\n path: lisp.py", + ] + ) + + requests = parse_lookup_requests(repeated) + + self.assertEqual(len(requests), 2) + self.assertEqual([request.name for request in requests], ["grep", "read_file"]) + + def test_repo_tools_skip_artifact_directories(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + (root / ".nightshift" / "runs").mkdir(parents=True) + (root / ".nightshift" / "runs" / "run.log").write_text("needle\n", encoding="utf-8") + (root / "src").mkdir() + (root / "src" / "app.py").write_text("needle\n", encoding="utf-8") + safety = SafetyConfig( + require_clean_worktree=False, + scoped_paths=(".",), + allowed_commands=(), + forbidden_commands=(), + ) + tools = RepoTools(root, safety, ArtifactStore(root, ".nightshift", run_id="test-run")) + + files = tools.list_files(".") + matches = tools.grep("needle", ".") + + self.assertIn("src/app.py", files) + self.assertNotIn(".nightshift", files) + self.assertIn("src/app.py:1", matches) + self.assertNotIn(".nightshift", matches) + if __name__ == "__main__": unittest.main()