From 646c655314c329d776f288b12edf62f33e5278b0 Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Sun, 17 May 2026 09:56:28 -0700 Subject: [PATCH] Repo Lookup, Request Context, Planner, Context Stage, QoL improvements - Added operational run logging via nightshift/runlog.py. - CLI now streams progress during run / run --all. - Runs write .nightshift/runs//run.log and aggregate .nightshift/nightshift.log. - Web dashboard now shows the last 100 run log lines. - Added agent temperature config. - Added minimal openai_compatible backend and temperature passing for it. - Added Ollama temperature handling. - Added scoped repo lookup tools in nightshift/repo_tools.py: list_files, read_file, grep. - Planner agents can request lookup context with lookup_requests; NightShift saves files-inspected.md and reruns the planner with retrieved context. - Added repo_context stage type that writes context-pack.md. - Marked phases 23-27 complete in docs/design.md:990. --- docs/design.md | 64 +++++----- nightshift/agents.py | 138 ++++++++++++++++++++- nightshift/artifacts.py | 2 + nightshift/cli.py | 3 +- nightshift/commands.py | 28 ++++- nightshift/config.py | 40 ++++++- nightshift/pipeline.py | 235 +++++++++++++++++++++++++++++++++++- nightshift/repo_tools.py | 250 +++++++++++++++++++++++++++++++++++++++ nightshift/runlog.py | 91 ++++++++++++++ nightshift/web.py | 15 ++- tests/test_agents.py | 39 +++++- tests/test_config.py | 53 +++++++++ tests/test_pipeline.py | 73 ++++++++++++ tests/test_repo_tools.py | 47 ++++++++ tests/test_web.py | 9 ++ 15 files changed, 1042 insertions(+), 45 deletions(-) create mode 100644 nightshift/repo_tools.py create mode 100644 nightshift/runlog.py create mode 100644 tests/test_repo_tools.py diff --git a/docs/design.md b/docs/design.md index 388f9c5..95d636a 100644 --- a/docs/design.md +++ b/docs/design.md @@ -989,18 +989,18 @@ NightShift should make active runs easier to observe from both the CLI and the w Implementation tasks: -* [ ] Add a small logging module with structured operational events. -* [ ] Stream human-readable progress to the CLI during `run` and `run --all`. -* [ ] Include run id, task id, stage id, agent/backend, command index, retry count, status, duration, and artifact path where available. -* [ ] Write a per-run log file such as `.nightshift/runs//run.log`. -* [ ] Optionally write or rotate an aggregate `.nightshift/nightshift.log` for cross-run troubleshooting. -* [ ] Keep logs operational; do not duplicate full prompts, full model responses, or full command output that already lives in artifacts. -* [ ] Redact or avoid secrets from logged environment/config values. -* [ ] Add dashboard support for viewing the latest log tail. -* [ ] Cap the dashboard log view to the last 100 lines by default. -* [ ] Keep the full per-run log file available as an artifact unless a later size cap is configured. -* [ ] Auto-refresh the dashboard log view with the existing dashboard refresh model. -* [ ] Add tests for log writing, CLI progress hooks, dashboard log rendering, missing log files, and the 100-line cap. +* [x] Add a small logging module with structured operational events. +* [x] Stream human-readable progress to the CLI during `run` and `run --all`. +* [x] Include run id, task id, stage id, agent/backend, command index, retry count, status, duration, and artifact path where available. +* [x] Write a per-run log file such as `.nightshift/runs//run.log`. +* [x] Optionally write or rotate an aggregate `.nightshift/nightshift.log` for cross-run troubleshooting. +* [x] Keep logs operational; do not duplicate full prompts, full model responses, or full command output that already lives in artifacts. +* [x] Redact or avoid secrets from logged environment/config values. +* [x] Add dashboard support for viewing the latest log tail. +* [x] Cap the dashboard log view to the last 100 lines by default. +* [x] Keep the full per-run log file available as an artifact unless a later size cap is configured. +* [x] Auto-refresh the dashboard log view with the existing dashboard refresh model. +* [x] Add tests for log writing, CLI progress hooks, dashboard log rendering, missing log files, and the 100-line cap. Acceptance Criteria: @@ -1019,35 +1019,35 @@ Notes: ## Phase 24: Per-Agent Model Parameters -- [ ] Add `temperature` to agent config. -- [ ] Pass temperature to Ollama/OpenAI-compatible backends. -- [ ] Default safely if omitted. -- [ ] Add config validation tests. +- [x] Add `temperature` to agent config. +- [x] Pass temperature to Ollama/OpenAI-compatible backends. +- [x] Default safely if omitted. +- [x] Add config validation tests. ## Phase 25: Repo Lookup Tools MVP -- [ ] Add tool interface for repo operations. -- [ ] Implement scoped `list_files`. -- [ ] Implement scoped `read_file`. -- [ ] Implement scoped `grep`. -- [ ] Enforce existing path safety rules. -- [ ] Log tool calls as artifacts. +- [x] Add tool interface for repo operations. +- [x] Implement scoped `list_files`. +- [x] Implement scoped `read_file`. +- [x] Implement scoped `grep`. +- [x] Enforce existing path safety rules. +- [x] Log tool calls as artifacts. ## Phase 26: Planner Code-Discovery Support -- [ ] Teach planner prompt to request needed code context. -- [ ] Add structured planner output for lookup requests. -- [ ] Execute requested lookup tools. -- [ ] Save `files-inspected.md`. -- [ ] Re-run planner with retrieved context. +- [x] Teach planner prompt to request needed code context. +- [x] Add structured planner output for lookup requests. +- [x] Execute requested lookup tools. +- [x] Save `files-inspected.md`. +- [x] Re-run planner with retrieved context. ## Phase 27: Context Pack Builder -- [ ] Add `repo_context` stage. -- [ ] Generate `context-pack.md`. -- [ ] Include task, acceptance criteria, relevant files, snippets, and constraints. -- [ ] Add line-numbered excerpts. -- [ ] Add context-size caps. +- [x] Add `repo_context` stage. +- [x] Generate `context-pack.md`. +- [x] Include task, acceptance criteria, relevant files, snippets, and constraints. +- [x] Add line-numbered excerpts. +- [x] Add context-size caps. ## Phase 28: Project Context Chart MVP diff --git a/nightshift/agents.py b/nightshift/agents.py index fdc652a..69618b3 100644 --- a/nightshift/agents.py +++ b/nightshift/agents.py @@ -3,13 +3,18 @@ from __future__ import annotations from dataclasses import dataclass +import json +import os from pathlib import Path import subprocess import time +from urllib import request +from urllib.error import URLError from .artifacts import ArtifactStore from .config import AgentConfig, StageConfig from .errors import AgentError, SafetyError +from .runlog import NullRunLogger, RunLogger from .safety import resolve_inside_root, resolve_project_root from .stages import StageResult, StageStatus from .tasks import Task @@ -43,11 +48,13 @@ class AgentExecutor: agents: dict[str, AgentConfig], artifacts: ArtifactStore, timeout_seconds: int = DEFAULT_AGENT_TIMEOUT_SECONDS, + logger: RunLogger | None = None, ) -> None: self.project_root = resolve_project_root(project_root) self.agents = agents self.artifacts = artifacts self.timeout_seconds = timeout_seconds + self.logger = logger or NullRunLogger() def run_stage( self, @@ -64,7 +71,7 @@ class AgentExecutor: agent = self.agents.get(stage.agent) if agent is None: raise AgentError(f"Agent error: unknown agent '{stage.agent}' for stage '{stage.id}'.") - if agent.backend not in {"command", "ollama"}: + if agent.backend not in {"command", "ollama", "openai_compatible"}: raise AgentError( f"Agent error: agent '{agent.id}' uses unsupported backend '{agent.backend}'." ) @@ -72,6 +79,10 @@ class AgentExecutor: raise AgentError(f"Agent error: command backend agent '{agent.id}' has no command.") if agent.backend == "ollama" and not agent.model: raise AgentError(f"Agent error: ollama backend agent '{agent.id}' has no model.") + if agent.backend == "openai_compatible" and not agent.model: + raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' has no model.") + if agent.backend == "openai_compatible" and not agent.base_url: + raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' has no base_url.") system_prompt = self._read_system_prompt(agent) prompt = build_prompt_bundle( @@ -84,10 +95,37 @@ class AgentExecutor: retry_notes=retry_notes or [], retry_context=retry_context, ) + self.logger.event( + "agent.start", + "Starting agent", + stage_id=stage.id, + agent_id=agent.id, + backend=agent.backend, + model=agent.model, + temperature=agent.temperature, + ) invocation = self._invoke(agent, prompt) + self.logger.event( + "agent.finish", + "Finished agent", + stage_id=stage.id, + agent_id=agent.id, + backend=agent.backend, + exit_code=invocation.exit_code, + duration=f"{invocation.duration_seconds:.3f}s", + timed_out=str(invocation.timed_out).lower(), + ) output_filename = stage.output or f"{stage.id}.md" output = format_agent_invocation(stage.id, invocation) output_path = self.artifacts.write_stage_output(task.id, output_filename, output) + self.logger.event( + "artifact.write", + "Wrote agent artifact", + stage_id=stage.id, + task_id=task.id, + agent_id=agent.id, + artifact_path=output_path.relative_to(self.project_root), + ) if invocation.timed_out: status: StageStatus = "fail" @@ -135,6 +173,8 @@ class AgentExecutor: def _invoke(self, agent: AgentConfig, prompt: str) -> AgentInvocation: if agent.backend == "ollama": return self._invoke_ollama(agent, prompt) + if agent.backend == "openai_compatible": + return self._invoke_openai_compatible(agent, prompt) return self._invoke_command(agent, prompt) def _invoke_command(self, agent: AgentConfig, prompt: str) -> AgentInvocation: @@ -180,12 +220,15 @@ class AgentExecutor: if not agent.model: raise AgentError(f"Agent error: ollama backend agent '{agent.id}' has no model.") command = f"ollama run {agent.model}" + prompt_input = prompt + if agent.temperature is not None: + prompt_input = f"/set parameter temperature {agent.temperature}\n{prompt}" started = time.monotonic() try: completed = subprocess.run( ["ollama", "run", agent.model], cwd=self.project_root, - input=prompt, + input=prompt_input, capture_output=True, text=True, encoding="utf-8", @@ -196,7 +239,7 @@ class AgentExecutor: return AgentInvocation( agent_id=agent.id, command=command, - prompt=prompt, + prompt=prompt_input, exit_code=completed.returncode, stdout=_coerce_output(completed.stdout), stderr=_coerce_output(completed.stderr), @@ -207,7 +250,7 @@ class AgentExecutor: return AgentInvocation( agent_id=agent.id, command=command, - prompt=prompt, + prompt=prompt_input, exit_code=127, stdout="", stderr=str(exc), @@ -218,7 +261,7 @@ class AgentExecutor: return AgentInvocation( agent_id=agent.id, command=command, - prompt=prompt, + prompt=prompt_input, exit_code=-1, stdout=_coerce_output(exc.stdout), stderr=_coerce_output(exc.stderr), @@ -226,6 +269,63 @@ class AgentExecutor: timed_out=True, ) + def _invoke_openai_compatible(self, agent: AgentConfig, prompt: str) -> AgentInvocation: + if not agent.model or not agent.base_url: + raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' is incomplete.") + url = agent.base_url.rstrip("/") + "/chat/completions" + command = f"POST {url}" + body: dict[str, object] = { + "model": agent.model, + "messages": [{"role": "user", "content": prompt}], + } + if agent.temperature is not None: + body["temperature"] = agent.temperature + headers = {"Content-Type": "application/json"} + api_key_env = agent.api_key_env or "OPENAI_API_KEY" + api_key = os.environ.get(api_key_env) + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + started = time.monotonic() + try: + payload = json.dumps(body).encode("utf-8") + req = request.Request(url, data=payload, headers=headers, method="POST") + with request.urlopen(req, timeout=self.timeout_seconds) as response: + raw = response.read().decode("utf-8", errors="replace") + duration = time.monotonic() - started + return AgentInvocation( + agent_id=agent.id, + command=command, + prompt=prompt, + exit_code=0, + stdout=_extract_openai_content(raw), + stderr="", + duration_seconds=duration, + ) + except TimeoutError: + duration = time.monotonic() - started + return AgentInvocation( + agent_id=agent.id, + command=command, + prompt=prompt, + exit_code=-1, + stdout="", + stderr="Request timed out.", + duration_seconds=duration, + timed_out=True, + ) + except (OSError, URLError) as exc: + duration = time.monotonic() - started + return AgentInvocation( + agent_id=agent.id, + command=command, + prompt=prompt, + exit_code=1, + stdout="", + stderr=str(exc), + duration_seconds=duration, + ) + def build_prompt_bundle( system_prompt: str, @@ -294,6 +394,20 @@ def _coerce_output(value: str | bytes | None) -> str: return value +def _extract_openai_content(raw: str) -> str: + try: + data = json.loads(raw) + choices = data.get("choices", []) + if choices: + message = choices[0].get("message", {}) + content = message.get("content") + if isinstance(content, str): + return content + except (json.JSONDecodeError, AttributeError): + pass + return raw + + def output_contract_for(stage: StageConfig) -> str: if stage.type in {"agent_review", "review"}: return "\n".join( @@ -305,6 +419,20 @@ def output_contract_for(stage: StageConfig) -> str: "context_update: ", ] ) + if stage.type == "agent" and ("plan" in stage.id.lower() or stage.agent == "planner"): + return "\n".join( + [ + "Write the requested stage output in concise markdown.", + "", + "If you need repository context before finalizing the plan, include:", + "lookup_requests:", + "- tool: list_files | read_file | grep", + " path: ", + " pattern: ", + "", + "NightShift will run these read-only lookup tools, save files-inspected.md, and re-run this planner stage with the retrieved context.", + ] + ) return "Write the requested stage output in concise markdown." diff --git a/nightshift/artifacts.py b/nightshift/artifacts.py index c9cba75..2bdda65 100644 --- a/nightshift/artifacts.py +++ b/nightshift/artifacts.py @@ -39,6 +39,8 @@ class ArtifactStore: self.project_context_path = self.artifact_root / "project-context.md" self.run_summary_path = self.run_dir / "run-summary.md" self.config_snapshot_path = self.run_dir / "config.snapshot.yaml" + self.run_log_path = self.run_dir / "run.log" + self.aggregate_log_path = self.artifact_root / "nightshift.log" @classmethod def from_config(cls, config: NightShiftConfig, run_id: str | None = None) -> "ArtifactStore": diff --git a/nightshift/cli.py b/nightshift/cli.py index 38d474c..dccb2a7 100644 --- a/nightshift/cli.py +++ b/nightshift/cli.py @@ -10,6 +10,7 @@ from .config import validate_config from .errors import NightShiftError from .init import init_project from .pipeline import PipelineRunner +from .runlog import RunLogger from .status import build_status, format_status from .tasks import ( ensure_dependencies_satisfied, @@ -80,7 +81,7 @@ def main(argv: list[str] | None = None) -> int: validate_task_dependencies(tasks) if args.all and args.task: parser.error("run accepts either --all or --task, not both.") - runner = PipelineRunner(config) + runner = PipelineRunner(config, logger=RunLogger(console=print)) if args.all: selected = [task for task in tasks if not task.completed] result = runner.run_tasks(selected) diff --git a/nightshift/commands.py b/nightshift/commands.py index 267b611..4949064 100644 --- a/nightshift/commands.py +++ b/nightshift/commands.py @@ -12,6 +12,7 @@ import time from .artifacts import ArtifactStore from .config import SafetyConfig, StageConfig from .errors import CommandError, SafetyError +from .runlog import NullRunLogger, RunLogger from .safety import ensure_command_allowed, resolve_inside_root, resolve_project_root from .stages import StageResult @@ -38,11 +39,13 @@ class CommandExecutor: safety: SafetyConfig, artifacts: ArtifactStore, timeout_seconds: int = DEFAULT_COMMAND_TIMEOUT_SECONDS, + logger: RunLogger | None = None, ) -> None: self.project_root = resolve_project_root(project_root) self.safety = safety self.artifacts = artifacts self.timeout_seconds = timeout_seconds + self.logger = logger or NullRunLogger() def run_stage(self, stage: StageConfig, task_id: str) -> StageResult: if stage.type != "command": @@ -56,7 +59,14 @@ class CommandExecutor: status = "pass" reason = "All commands passed." - for command in stage.commands: + for index, command in enumerate(stage.commands, start=1): + self.logger.event( + "command.start", + "Starting command", + stage_id=stage.id, + command_index=index, + command=command, + ) run = self.run_command( command, shell=stage.shell, @@ -64,6 +74,15 @@ class CommandExecutor: working_dir=stage.working_dir, ) runs.append(run) + self.logger.event( + "command.finish", + "Finished command", + stage_id=stage.id, + command_index=index, + exit_code=run.exit_code, + duration=f"{run.duration_seconds:.3f}s", + timed_out=str(run.timed_out).lower(), + ) if run.timed_out: status = "fail" timeout = stage.timeout_seconds or self.timeout_seconds @@ -80,6 +99,13 @@ class CommandExecutor: output_filename, format_command_runs(stage.id, runs), ) + self.logger.event( + "artifact.write", + "Wrote command artifact", + stage_id=stage.id, + task_id=task_id, + artifact_path=output_path.relative_to(self.project_root), + ) return StageResult( stage_id=stage.id, status=status, # type: ignore[arg-type] diff --git a/nightshift/config.py b/nightshift/config.py index 90f3cf6..7d70d86 100644 --- a/nightshift/config.py +++ b/nightshift/config.py @@ -43,6 +43,9 @@ class AgentConfig: system_prompt: Path model: str | None = None role: str | None = None + temperature: float | None = None + base_url: str | None = None + api_key_env: str | None = None @dataclass(frozen=True) @@ -83,7 +86,7 @@ class NightShiftConfig: AGENT_STAGE_TYPES = {"agent", "agent_review", "review"} COMMAND_STAGE_TYPES = {"command"} -SUPPORTED_STAGE_TYPES = AGENT_STAGE_TYPES | COMMAND_STAGE_TYPES | {"summarize"} +SUPPORTED_STAGE_TYPES = AGENT_STAGE_TYPES | COMMAND_STAGE_TYPES | {"repo_context", "summarize"} def load_config(path: str | Path = "nightshift.yaml") -> NightShiftConfig: @@ -181,10 +184,20 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig: backend = _require_string(agent_raw, "backend", f"agents.{agent_id}") command = _optional_string(agent_raw.get("command"), f"agents.{agent_id}.command") model = _optional_string(agent_raw.get("model"), f"agents.{agent_id}.model") - if backend not in {"command", "ollama"}: + base_url = _optional_string(agent_raw.get("base_url"), f"agents.{agent_id}.base_url") + api_key_env = _optional_string(agent_raw.get("api_key_env"), f"agents.{agent_id}.api_key_env") + temperature = _optional_float_or_none( + agent_raw.get("temperature"), + f"agents.{agent_id}.temperature", + ) + if temperature is not None and temperature < 0: + raise ConfigError( + f"Config error: agents.{agent_id}.temperature must be zero or greater." + ) + if backend not in {"command", "ollama", "openai_compatible"}: raise ConfigError( f"Config error: agent '{agent_id}' uses unsupported backend '{backend}'. " - "Supported backends: command, ollama." + "Supported backends: command, ollama, openai_compatible." ) if backend == "command" and command is None: raise ConfigError( @@ -194,6 +207,14 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig: raise ConfigError( f"Config error: ollama backend agent '{agent_id}' must define model." ) + if backend == "openai_compatible" and model is None: + raise ConfigError( + f"Config error: openai_compatible backend agent '{agent_id}' must define model." + ) + if backend == "openai_compatible" and base_url is None: + raise ConfigError( + f"Config error: openai_compatible backend agent '{agent_id}' must define base_url." + ) system_prompt = Path(_require_string(agent_raw, "system_prompt", f"agents.{agent_id}")) agents[str(agent_id)] = AgentConfig( id=str(agent_id), @@ -202,6 +223,9 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig: system_prompt=system_prompt, model=model, role=_optional_string(agent_raw.get("role"), f"agents.{agent_id}.role"), + temperature=temperature, + base_url=base_url, + api_key_env=api_key_env, ) experiment_raw = raw.get("experiment", {}) @@ -444,6 +468,8 @@ def _parse_scalar(value: str) -> Any: return None if re.fullmatch(r"-?\d+", value): return int(value) + if re.fullmatch(r"-?(\d+\.\d*|\d*\.\d+)", value): + return float(value) if (value.startswith('"') and value.endswith('"')) or ( value.startswith("'") and value.endswith("'") ): @@ -492,6 +518,14 @@ def _optional_int_or_none(value: Any, context: str) -> int | None: return _optional_int(value, context) +def _optional_float_or_none(value: Any, context: str) -> float | None: + if value is None: + return None + if isinstance(value, bool) or not isinstance(value, (int, float)): + raise ConfigError(f"Config error: '{context}' must be a number when set.") + return float(value) + + def _string_tuple(value: Any, context: str) -> tuple[str, ...]: if value is None: return () diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py index 2f305b2..39d0e96 100644 --- a/nightshift/pipeline.py +++ b/nightshift/pipeline.py @@ -4,6 +4,7 @@ from __future__ import annotations from dataclasses import dataclass from pathlib import Path +import re from .agents import AgentExecutor from .artifacts import ArtifactStore @@ -14,6 +15,8 @@ from .errors import PipelineError from .errors import NightShiftError from .git import ensure_clean_worktree, write_diff_artifact, write_git_artifacts from .reports import ReportGenerator +from .repo_tools import RepoTools, extract_agent_stdout, parse_lookup_requests +from .runlog import RunLogger from .stages import StageResult from .tasks import Task, mark_task_completed @@ -46,9 +49,11 @@ class PipelineRunner: artifacts: ArtifactStore | None = None, agent_timeout_seconds: int = 600, command_timeout_seconds: int = 300, + logger: RunLogger | None = None, ) -> None: self.config = config self.artifacts = artifacts or ArtifactStore.from_config(config) + self.logger = logger or RunLogger() self.context = ContextManager(self.artifacts) self.reports = ReportGenerator( config.project.root, @@ -61,17 +66,33 @@ class PipelineRunner: config.agents, self.artifacts, timeout_seconds=agent_timeout_seconds, + logger=self.logger, ) self.command_executor = CommandExecutor( config.project.root, config.safety, self.artifacts, timeout_seconds=command_timeout_seconds, + logger=self.logger, + ) + self.repo_tools = RepoTools( + config.project.root, + config.safety, + self.artifacts, + logger=self.logger, ) def run_task(self, task: Task) -> PipelineResult: ensure_clean_worktree(self.config.project.root, self.config.safety.require_clean_worktree) self.artifacts.initialize_run() + self.logger.bind(self.artifacts) + self.logger.event( + "task.start", + "Starting task", + run_id=self.artifacts.run_id, + task_id=task.id, + task_title=task.title, + ) self.artifacts.write_config_snapshot(self.config.path) self.artifacts.write_prompt_snapshots( { @@ -97,6 +118,15 @@ class PipelineRunner: while index < len(stages): stage = stages[index] + self.logger.event( + "stage.start", + "Starting stage", + run_id=self.artifacts.run_id, + task_id=task.id, + stage_id=stage.id, + stage_type=stage.type, + retry_count=retry_count, + ) try: result = self._run_stage(stage, task, previous_outputs, retry_notes) except NightShiftError as exc: @@ -113,6 +143,16 @@ class PipelineRunner: ) stage_results.append(result) previous_outputs[stage.id] = self._read_output(result.output_path) + self.logger.event( + "stage.finish", + "Finished stage", + run_id=self.artifacts.run_id, + task_id=task.id, + stage_id=stage.id, + status=result.status, + reason=result.reason, + artifact_path=result.output_path, + ) if result.context_update: retry_notes.append(f"Context update from '{stage.id}': {result.context_update}") @@ -135,6 +175,16 @@ class PipelineRunner: ) break retry_count += 1 + self.logger.event( + "stage.retry", + "Redirecting after stage result", + run_id=self.artifacts.run_id, + task_id=task.id, + stage_id=stage.id, + status=result.status, + retry_count=retry_count, + next_stage=target_stage, + ) retry_notes.append( f"Retry {retry_count}: stage '{stage.id}' returned " f"{result.status} ({result.reason}); redirecting to '{target_stage}'." @@ -179,6 +229,16 @@ class PipelineRunner: stage_results, context_out_path=context_out_path, ) + self.logger.event( + "task.finish", + "Finished task", + run_id=self.artifacts.run_id, + task_id=task.id, + status=final_status, + retry_count=retry_count, + reason=final_reason, + artifact_path=self.artifacts.create_task_dir(task.id).directory.relative_to(self.config.project.root), + ) return PipelineResult( task_id=task.id, @@ -191,6 +251,8 @@ class PipelineRunner: def run_tasks(self, tasks: list[Task] | tuple[Task, ...]) -> MultiTaskResult: self.artifacts.initialize_run() + self.logger.bind(self.artifacts) + self.logger.event("run.start", "Starting multi-task run", run_id=self.artifacts.run_id) results: list[PipelineResult] = [] known_ids = {task.id for task in tasks} completed_ids = {task.id for task in tasks if task.completed} @@ -216,6 +278,13 @@ class PipelineRunner: reason="Task blocked by " + "; ".join(reason_parts), ) results.append(blocked) + self.logger.event( + "task.blocked", + "Task blocked by dependencies", + run_id=self.artifacts.run_id, + task_id=task.id, + reason=blocked.reason, + ) if not self.config.pipeline.continue_on_task_failure: break continue @@ -234,6 +303,14 @@ class PipelineRunner: format_aggregate_run_summary(results, status, reason), encoding="utf-8", ) + self.logger.event( + "run.finish", + "Finished multi-task run", + run_id=self.artifacts.run_id, + status=status, + completed_count=completed_count, + failed_count=failed_count, + ) return MultiTaskResult( status=status, task_results=tuple(results), @@ -251,7 +328,7 @@ class PipelineRunner: ) -> StageResult: if stage.type in {"agent", "agent_review", "review"}: context = self.context.read_context(task, retry_notes) - return self.agent_executor.run_stage( + result = self.agent_executor.run_stage( stage, task, previous_outputs, @@ -260,8 +337,39 @@ class PipelineRunner: task_context=context.task_context, retry_context=context.retry_context, ) + if stage.type == "agent": + return self._maybe_rerun_agent_with_repo_lookup( + stage, + task, + result, + previous_outputs, + retry_notes, + context.project_context, + context.task_context, + context.retry_context, + ) + return result if stage.type in COMMAND_STAGE_TYPES: return self.command_executor.run_stage(stage, task.id) + if stage.type == "repo_context": + output_path = self.artifacts.write_stage_output( + task.id, + stage.output or "context-pack.md", + self._build_context_pack(task), + ) + self.logger.event( + "artifact.write", + "Wrote context pack", + stage_id=stage.id, + task_id=task.id, + artifact_path=output_path.relative_to(self.config.project.root), + ) + return StageResult( + stage_id=stage.id, + status="pass", + reason="Context pack written.", + output_path=str(output_path.relative_to(self.config.project.root)), + ) if stage.type == "summarize": output_path = self.artifacts.write_stage_output( task.id, @@ -276,6 +384,103 @@ class PipelineRunner: ) raise PipelineError(f"Pipeline error: unsupported stage type '{stage.type}'.") + def _maybe_rerun_agent_with_repo_lookup( + self, + stage: StageConfig, + task: Task, + result: StageResult, + previous_outputs: dict[str, str], + retry_notes: list[str], + project_context: str, + task_context: str, + retry_context: str | None, + ) -> StageResult: + if result.status != "pass" or result.output_path is None: + return result + output_text = self._read_output(result.output_path) + requests = parse_lookup_requests(extract_agent_stdout(output_text)) + if not requests: + return result + lookup_context = self.repo_tools.execute_requests( + task.id, + requests, + filename="files-inspected.md", + ) + self.logger.event( + "agent.rerun", + "Re-running agent with repo lookup context", + stage_id=stage.id, + task_id=task.id, + lookup_count=len(requests), + ) + rerun_outputs = dict(previous_outputs) + rerun_outputs["repo_lookup_results"] = lookup_context + rerun_result = self.agent_executor.run_stage( + stage, + task, + rerun_outputs, + retry_notes, + project_context=project_context, + task_context=task_context, + retry_context=retry_context, + ) + return StageResult( + stage_id=rerun_result.stage_id, + status=rerun_result.status, + reason=( + "Agent completed after repo lookup." + if rerun_result.status == "pass" + else rerun_result.reason + ), + output_path=rerun_result.output_path, + next_stage=rerun_result.next_stage, + context_update=rerun_result.context_update, + ) + + def _build_context_pack(self, task: Task) -> str: + terms = _task_search_terms(task) + files = self.repo_tools.list_files(".", pattern="*.py", max_files=80) + grep_sections: list[str] = [] + for term in terms[:5]: + grep_sections.extend( + [ + f"### Search: {term}", + "", + "```text", + self.repo_tools.grep(re.escape(term), ".", max_matches=20), + "```", + "", + ] + ) + return "\n".join( + [ + "# Context Pack", + "", + f"Task: `{task.id}`", + f"Title: {task.title}", + "", + "## Acceptance Criteria", + "", + "\n".join(f"- {item}" for item in task.acceptance_criteria) or "- None", + "", + "## Constraints", + "", + f"- Scoped paths: {', '.join(self.config.safety.scoped_paths) or '.'}", + "- Repository lookups are read-only.", + "- Excerpts are line-numbered where files are read directly.", + "", + "## Relevant Files", + "", + "```text", + files, + "```", + "", + "## Search Results", + "", + *grep_sections, + ] + ) + def _read_output(self, output_path: str | None) -> str: if output_path is None: return "" @@ -365,9 +570,37 @@ def format_run_metadata(config: NightShiftConfig) -> str: "", f"- Backend: {agent.backend}", f"- Model: {agent.model or ''}", + f"- Temperature: {agent.temperature if agent.temperature is not None else ''}", + f"- Base URL: {agent.base_url or ''}", f"- Command: {agent.command or ''}", f"- System prompt: {agent.system_prompt}", "", ] ) return "\n".join(lines) + + +def _task_search_terms(task: Task) -> list[str]: + source = " ".join([task.id, task.title, *task.acceptance_criteria]) + words = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", source) + ignored = { + "the", + "and", + "for", + "with", + "that", + "this", + "task", + "add", + "use", + "can", + "should", + "must", + } + terms: list[str] = [] + for word in words: + lowered = word.lower() + if lowered in ignored or lowered in terms: + continue + terms.append(lowered) + return terms or [task.id] diff --git a/nightshift/repo_tools.py b/nightshift/repo_tools.py new file mode 100644 index 0000000..6b94e0f --- /dev/null +++ b/nightshift/repo_tools.py @@ -0,0 +1,250 @@ +"""Scoped repository lookup tools.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +import fnmatch +import re + +from .artifacts import ArtifactStore +from .config import SafetyConfig +from .errors import SafetyError +from .runlog import NullRunLogger, RunLogger +from .safety import resolve_inside_root, resolve_project_root, validate_scoped_paths + + +DEFAULT_MAX_BYTES = 20_000 +DEFAULT_MAX_MATCHES = 100 + + +@dataclass(frozen=True) +class ToolCall: + name: str + arguments: dict[str, str] + output: str + + +class RepoTools: + """Read-only repo tools constrained to configured project scope.""" + + def __init__( + self, + project_root: str | Path, + safety: SafetyConfig, + artifacts: ArtifactStore, + logger: RunLogger | None = None, + ) -> None: + self.project_root = resolve_project_root(project_root) + self.safety = safety + self.artifacts = artifacts + self.logger = logger or NullRunLogger() + self.scoped_roots = validate_scoped_paths( + self.project_root, + safety.scoped_paths or (".",), + ) + + def list_files(self, path: str = ".", pattern: str = "*", max_files: int = 200) -> str: + root = self._resolve_scoped(path, "list_files path") + if not root.exists(): + return f"Path not found: {path}" + if root.is_file(): + candidates = [root] + else: + candidates = [item for item in root.rglob("*") if item.is_file()] + relative_files = [ + _relative(item, self.project_root) + for item in sorted(candidates) + if fnmatch.fnmatch(item.name, pattern) + ] + lines = relative_files[:max_files] + if len(relative_files) > max_files: + lines.append(f"... truncated {len(relative_files) - max_files} files") + return "\n".join(lines) or "No files found." + + def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str: + file_path = self._resolve_scoped(path, "read_file path") + if not file_path.exists() or not file_path.is_file(): + return f"File not found: {path}" + data = file_path.read_bytes()[:max_bytes + 1] + truncated = len(data) > max_bytes + text = data[:max_bytes].decode("utf-8", errors="replace") + numbered = _line_number(text) + if truncated: + numbered += "\n... truncated" + return numbered + + def grep( + self, + pattern: str, + path: str = ".", + max_matches: int = DEFAULT_MAX_MATCHES, + ) -> str: + root = self._resolve_scoped(path, "grep path") + regex = re.compile(pattern) + files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()] + matches: list[str] = [] + for file_path in sorted(files): + try: + text = file_path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + for line_number, line in enumerate(text.splitlines(), start=1): + if regex.search(line): + matches.append(f"{_relative(file_path, self.project_root)}:{line_number}: {line}") + if len(matches) >= max_matches: + matches.append("... truncated") + return "\n".join(matches) + return "\n".join(matches) or "No matches found." + + def write_tool_artifact(self, task_id: str, calls: list[ToolCall], filename: str = "repo-tools.md") -> Path: + content = format_tool_calls(calls) + path = self.artifacts.write_stage_output(task_id, filename, content) + self.logger.event( + "artifact.write", + "Wrote repo tool artifact", + task_id=task_id, + artifact_path=path.relative_to(self.project_root), + ) + return path + + def execute_requests(self, task_id: str, requests: list[ToolCall], filename: str = "repo-tools.md") -> str: + completed: list[ToolCall] = [] + for request in requests: + self.logger.event( + "tool.call", + "Running repo lookup tool", + task_id=task_id, + tool=request.name, + **request.arguments, + ) + try: + output = self._execute_request(request) + except (SafetyError, re.error) as exc: + output = str(exc) + completed.append(ToolCall(request.name, request.arguments, output)) + self.write_tool_artifact(task_id, completed, filename=filename) + return format_tool_calls(completed) + + def _execute_request(self, request: ToolCall) -> str: + if request.name == "list_files": + return self.list_files( + path=request.arguments.get("path", "."), + pattern=request.arguments.get("pattern", "*"), + ) + if request.name == "read_file": + path = request.arguments.get("path") + if not path: + return "Missing required argument: path" + return self.read_file(path) + if request.name == "grep": + pattern = request.arguments.get("pattern") + if not pattern: + return "Missing required argument: pattern" + return self.grep(pattern, path=request.arguments.get("path", ".")) + return f"Unsupported repo lookup tool: {request.name}" + + def _resolve_scoped(self, path: str, context: str) -> Path: + resolved = resolve_inside_root(self.project_root, path, context) + for scoped_root in self.scoped_roots: + try: + resolved.relative_to(scoped_root) + return resolved + except ValueError: + continue + scopes = ", ".join(_relative(item, self.project_root) for item in self.scoped_roots) + raise SafetyError(f"Safety error: {context} is outside configured scoped paths: {path}. Scopes: {scopes}") + + +def format_tool_calls(calls: list[ToolCall]) -> str: + lines = ["# Repo Tool Calls", ""] + if not calls: + lines.append("No tool calls.") + return "\n".join(lines) + for index, call in enumerate(calls, start=1): + lines.extend( + [ + f"## {index}. {call.name}", + "", + "Arguments:", + ] + ) + for key, value in sorted(call.arguments.items()): + lines.append(f"- {key}: `{value}`") + lines.extend(["", "Output:", "", "```text", call.output.rstrip(), "```", ""]) + return "\n".join(lines) + + +def parse_lookup_requests(text: str) -> list[ToolCall]: + """Parse a small YAML-like lookup request list from model output.""" + + lines = text.splitlines() + in_section = False + current: dict[str, str] = {} + requests: list[ToolCall] = [] + + def flush() -> None: + nonlocal current + if not current: + return + name = current.pop("tool", "").strip() + if name: + requests.append(ToolCall(name=name, arguments=dict(current), output="")) + current = {} + + for raw_line in lines: + stripped = raw_line.strip() + if stripped in {"lookup_requests:", "repo_lookup:", "repo_lookups:"}: + in_section = True + continue + if not in_section: + continue + if not stripped: + continue + if not raw_line.startswith((" ", "-", "\t")) and not stripped.endswith(":"): + break + if stripped.startswith("- "): + flush() + stripped = stripped[2:].strip() + if ":" not in stripped: + continue + key, value = stripped.split(":", 1) + key = key.strip() + value = value.strip().strip('"').strip("'") + if key == "tool" and current: + flush() + current[key] = value + flush() + return requests + + +def extract_agent_stdout(artifact_text: str) -> str: + lines = artifact_text.splitlines() + for index, line in enumerate(lines): + if line.strip() != "## stdout": + continue + start = None + for cursor in range(index + 1, len(lines)): + if lines[cursor].strip().startswith("```"): + start = cursor + 1 + break + if start is None: + return "" + end = len(lines) + for cursor in range(start, len(lines)): + if lines[cursor].strip().startswith("```"): + end = cursor + break + return "\n".join(lines[start:end]) + return artifact_text + + +def _line_number(text: str) -> str: + return "\n".join(f"{index}: {line}" for index, line in enumerate(text.splitlines(), start=1)) + + +def _relative(path: Path, root: Path) -> str: + try: + return path.relative_to(root).as_posix() + except ValueError: + return path.as_posix() diff --git a/nightshift/runlog.py b/nightshift/runlog.py new file mode 100644 index 0000000..b8a7014 --- /dev/null +++ b/nightshift/runlog.py @@ -0,0 +1,91 @@ +"""Operational run logging for NightShift.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Callable + +from .artifacts import ArtifactStore + + +ConsoleWriter = Callable[[str], None] + + +@dataclass(frozen=True) +class LogEvent: + event: str + message: str + fields: dict[str, object] + + +class RunLogger: + """Write concise operational events to CLI and run log artifacts.""" + + def __init__(self, console: ConsoleWriter | None = None) -> None: + self.console = console + self._run_log_path: Path | None = None + self._aggregate_log_path: Path | None = None + + def bind(self, artifacts: ArtifactStore) -> None: + artifacts.initialize_run() + self._run_log_path = artifacts.run_log_path + self._aggregate_log_path = artifacts.aggregate_log_path + + def event(self, event: str, message: str, **fields: object) -> None: + safe_fields = _redact_fields(fields) + line = format_log_line(LogEvent(event=event, message=message, fields=safe_fields)) + if self.console is not None: + self.console(line) + for path in (self._run_log_path, self._aggregate_log_path): + if path is None: + continue + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as handle: + handle.write(line + "\n") + + +class NullRunLogger(RunLogger): + def __init__(self) -> None: + super().__init__(console=None) + + def bind(self, artifacts: ArtifactStore) -> None: + return None + + def event(self, event: str, message: str, **fields: object) -> None: + return None + + +def format_log_line(log_event: LogEvent) -> str: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + parts = [timestamp, log_event.event, log_event.message] + for key, value in sorted(log_event.fields.items()): + if value is None or value == "": + continue + parts.append(f"{key}={_format_value(value)}") + return " | ".join(parts) + + +def tail_lines(path: Path, limit: int = 100) -> list[str]: + if limit <= 0: + return [] + if not path.exists() or not path.is_file(): + return [] + return path.read_text(encoding="utf-8", errors="replace").splitlines()[-limit:] + + +def _format_value(value: object) -> str: + text = str(value).replace("\n", " ").replace("\r", " ") + return text if text else "" + + +def _redact_fields(fields: dict[str, object]) -> dict[str, object]: + redacted: dict[str, object] = {} + for key, value in fields.items(): + lowered = key.lower() + if any(marker in lowered for marker in ("secret", "token", "password", "key")): + redacted[key] = "" + else: + redacted[key] = value + return redacted diff --git a/nightshift/web.py b/nightshift/web.py index e3d7f98..bbea4d5 100644 --- a/nightshift/web.py +++ b/nightshift/web.py @@ -7,6 +7,7 @@ from html import escape from pathlib import Path from .errors import NightShiftError +from .runlog import tail_lines @dataclass(frozen=True) @@ -14,6 +15,7 @@ class RunInfo: name: str path: Path summary: str + log_tail: tuple[str, ...] = () def list_runs(artifact_dir: str | Path) -> list[RunInfo]: @@ -24,7 +26,14 @@ def list_runs(artifact_dir: str | Path) -> list[RunInfo]: for path in sorted((item for item in runs_dir.iterdir() if item.is_dir()), reverse=True): summary_path = path / "run-summary.md" summary = summary_path.read_text(encoding="utf-8") if summary_path.exists() else "No run summary yet." - runs.append(RunInfo(name=path.name, path=path, summary=summary)) + runs.append( + RunInfo( + name=path.name, + path=path, + summary=summary, + log_tail=tuple(tail_lines(path / "run.log", limit=100)), + ) + ) return runs @@ -51,6 +60,10 @@ def render_dashboard(artifact_dir: str | Path) -> str: "
",
                 escape(run.summary),
                 "
", + "

Log Tail

", + "
",
+                escape("\n".join(run.log_tail) if run.log_tail else "No run log yet."),
+                "
", "", ] ) diff --git a/tests/test_agents.py b/tests/test_agents.py index ad45546..6b73c3e 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -1,7 +1,7 @@ from pathlib import Path import tempfile import unittest -from unittest.mock import patch +from unittest.mock import MagicMock, patch from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output from nightshift.agents import AgentInvocation, format_agent_invocation @@ -132,6 +132,43 @@ class AgentExecutorTests(unittest.TestCase): output = (root / result.output_path).read_text(encoding="utf-8") self.assertIn("ollama run tiny-model", output) + def test_openai_compatible_agent_sends_temperature(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + prompt_path = root / "planner.md" + prompt_path.write_text("Plan carefully.", encoding="utf-8") + artifacts = ArtifactStore(root, ".nightshift", run_id="test-run") + executor = AgentExecutor( + root, + { + "planner": AgentConfig( + id="planner", + backend="openai_compatible", + command=None, + model="tiny-model", + base_url="http://localhost:11434/v1", + temperature=0.2, + system_prompt=Path("planner.md"), + ) + }, + artifacts, + ) + task = parse_tasks(TASK_MD)[0] + stage = StageConfig(id="plan", type="agent", agent="planner", output="plan.md") + response = MagicMock() + response.__enter__.return_value.read.return_value = ( + b'{"choices":[{"message":{"content":"api output"}}]}' + ) + + with patch("nightshift.agents.request.urlopen", return_value=response) as urlopen: + result = executor.run_stage(stage, task) + + self.assertEqual(result.status, "pass") + request_obj = urlopen.call_args.args[0] + body = request_obj.data.decode("utf-8") + self.assertIn('"temperature": 0.2', body) + self.assertIn("api output", (root / result.output_path).read_text(encoding="utf-8")) + def test_agent_artifact_format_tolerates_missing_streams(self) -> None: invocation = AgentInvocation( agent_id="planner", diff --git a/tests/test_config.py b/tests/test_config.py index 4522dce..41b6156 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -167,6 +167,24 @@ class ConfigTests(unittest.TestCase): self.assertEqual(config.agents["planner"].model, "qwen2.5-coder:14b") self.assertEqual(config.experiment.label, "local-test") + def test_openai_compatible_backend_loads(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + init_project(root) + config_path = root / "nightshift.yaml" + text = config_path.read_text(encoding="utf-8").replace( + "backend: command\n command: echo", + "backend: openai_compatible\n model: local-model\n base_url: http://localhost:11434/v1\n temperature: 0.1", + 1, + ) + config_path.write_text(text, encoding="utf-8") + + config = load_config(config_path) + + self.assertEqual(config.agents["planner"].backend, "openai_compatible") + self.assertEqual(config.agents["planner"].base_url, "http://localhost:11434/v1") + self.assertEqual(config.agents["planner"].temperature, 0.1) + def test_command_stage_options_load(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) @@ -188,6 +206,41 @@ class ConfigTests(unittest.TestCase): self.assertEqual(test_stage.timeout_seconds, 30) self.assertEqual(test_stage.working_dir, Path(".")) + def test_agent_temperature_loads(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + init_project(root) + config_path = root / "nightshift.yaml" + config_path.write_text( + config_path.read_text(encoding="utf-8").replace( + " system_prompt: agents/planner.md", + " system_prompt: agents/planner.md\n temperature: 0.2", + 1, + ), + encoding="utf-8", + ) + + config = load_config(config_path) + + self.assertEqual(config.agents["planner"].temperature, 0.2) + + def test_agent_temperature_must_be_number(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + init_project(root) + config_path = root / "nightshift.yaml" + config_path.write_text( + config_path.read_text(encoding="utf-8").replace( + " system_prompt: agents/planner.md", + " system_prompt: agents/planner.md\n temperature: low", + 1, + ), + encoding="utf-8", + ) + + with self.assertRaisesRegex(ConfigError, "temperature"): + load_config(config_path) + def test_non_command_stage_cannot_define_commands(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index c66c555..be4aa60 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -230,6 +230,79 @@ Acceptance Criteria: self.assertEqual(result.status, "failed") self.assertEqual(result.task_results[0].status, "blocked") + def test_run_writes_operational_log(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + stages = (StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),) + artifacts = ArtifactStore(root, ".nightshift", run_id="test-run") + config = make_config(root, stages) + runner = PipelineRunner(config, artifacts) + task = parse_tasks(TASK_MD)[0] + + runner.run_task(task) + + log = (root / ".nightshift" / "runs" / "test-run" / "run.log").read_text(encoding="utf-8") + self.assertIn("task.start", log) + self.assertIn("stage.start", log) + self.assertIn("agent.finish", log) + + def test_planner_lookup_requests_write_files_inspected_and_rerun(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + (root / "target.py").write_text("VALUE = 1\n", encoding="utf-8") + (root / "fake_planner.py").write_text( + "\n".join( + [ + "import sys", + "prompt = sys.stdin.read()", + "if 'repo_lookup_results' in prompt:", + " print('final plan with context')", + "else:", + " print('lookup_requests:')", + " print('- tool: read_file')", + " print(' path: target.py')", + ] + ), + encoding="utf-8", + ) + stages = (StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),) + config = make_config(root, stages) + config.agents["planner"] = AgentConfig( + id="planner", + backend="command", + command="python fake_planner.py", + system_prompt=Path("planner.md"), + ) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + task = parse_tasks(TASK_MD)[0] + + result = runner.run_task(task) + + task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id + self.assertEqual(result.status, "complete") + self.assertTrue((task_dir / "files-inspected.md").exists()) + self.assertIn("1: VALUE = 1", (task_dir / "files-inspected.md").read_text(encoding="utf-8")) + self.assertIn("final plan with context", (task_dir / "plan.md").read_text(encoding="utf-8")) + + def test_repo_context_stage_writes_context_pack(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + (root / "app.py").write_text("def run_pipeline():\n return True\n", encoding="utf-8") + stages = (StageConfig(id="context", type="repo_context", output="context-pack.md"),) + config = make_config(root, stages) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + task = parse_tasks(TASK_MD)[0] + + result = runner.run_task(task) + + pack = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id / "context-pack.md" + self.assertEqual(result.status, "complete") + self.assertIn("Context Pack", pack.read_text(encoding="utf-8")) + self.assertIn("app.py", pack.read_text(encoding="utf-8")) + def _write_common_files(root: Path) -> None: (root / "nightshift.yaml").write_text("project:\n name: test\n", encoding="utf-8") diff --git a/tests/test_repo_tools.py b/tests/test_repo_tools.py new file mode 100644 index 0000000..73e84a0 --- /dev/null +++ b/tests/test_repo_tools.py @@ -0,0 +1,47 @@ +from pathlib import Path +import tempfile +import unittest + +from nightshift.artifacts import ArtifactStore +from nightshift.config import SafetyConfig +from nightshift.repo_tools import RepoTools, parse_lookup_requests + + +class RepoToolsTests(unittest.TestCase): + def test_repo_tools_are_scoped_and_line_numbered(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + (root / "src").mkdir() + (root / "src" / "app.py").write_text("def hello():\n return 'hi'\n", encoding="utf-8") + safety = SafetyConfig( + require_clean_worktree=False, + scoped_paths=("src",), + allowed_commands=(), + forbidden_commands=(), + ) + tools = RepoTools(root, safety, ArtifactStore(root, ".nightshift", run_id="test-run")) + + self.assertIn("src/app.py", tools.list_files("src", "*.py")) + self.assertIn("1: def hello():", tools.read_file("src/app.py")) + self.assertIn("src/app.py:1", tools.grep("hello", "src")) + + def test_parse_lookup_requests(self) -> None: + output = """Plan needs context. + +lookup_requests: +- tool: read_file + path: nightshift/pipeline.py +- tool: grep + path: nightshift + pattern: PipelineRunner +""" + + requests = parse_lookup_requests(output) + + self.assertEqual([request.name for request in requests], ["read_file", "grep"]) + self.assertEqual(requests[0].arguments["path"], "nightshift/pipeline.py") + self.assertEqual(requests[1].arguments["pattern"], "PipelineRunner") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_web.py b/tests/test_web.py index 8c09395..83ab6b4 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -19,14 +19,23 @@ class WebDashboardTests(unittest.TestCase): artifacts = ArtifactStore(root, ".nightshift", run_id="test-run") artifacts.initialize_run() artifacts.run_summary_path.write_text("# Summary\n\nok", encoding="utf-8") + artifacts.run_log_path.write_text( + "\n".join(f"line {index}" for index in range(120)), + encoding="utf-8", + ) runs = list_runs(root / ".nightshift") content = read_artifact(root / ".nightshift" / "runs" / "test-run", "run-summary.md") escaped = read_artifact(root / ".nightshift" / "runs" / "test-run", "../project-context.md") + dashboard = render_dashboard(root / ".nightshift") self.assertEqual(len(runs), 1) + self.assertEqual(len(runs[0].log_tail), 100) self.assertIn("ok", content) self.assertIn("escapes", escaped) + self.assertIn("Log Tail", dashboard) + self.assertIn("line 119", dashboard) + self.assertNotIn("line 19\n", dashboard) if __name__ == "__main__":