From 646c655314c329d776f288b12edf62f33e5278b0 Mon Sep 17 00:00:00 2001
From: "K. Hodges" <khodges42@gmail.com>
Date: Sun, 17 May 2026 09:56:28 -0700
Subject: [PATCH] Repo Lookup, Request Context, Planner, Context Stage, QoL
 improvements

  - Added operational run logging via nightshift/runlog.py.
  - CLI now streams progress during run / run --all.
  - Runs write .nightshift/runs/<run-id>/run.log and aggregate .nightshift/nightshift.log.
  - Web dashboard now shows the last 100 run log lines.
  - Added agent temperature config.
  - Added minimal openai_compatible backend and temperature passing for it.
  - Added Ollama temperature handling.
  - Added scoped repo lookup tools in nightshift/repo_tools.py: list_files, read_file, grep.
  - Planner agents can request lookup context with lookup_requests; NightShift saves files-inspected.md and reruns the planner with retrieved context.
  - Added repo_context stage type that writes context-pack.md.
  - Marked phases 23-27 complete in docs/design.md:990.
---
 docs/design.md           |  64 +++++-----
 nightshift/agents.py     | 138 ++++++++++++++++++++-
 nightshift/artifacts.py  |   2 +
 nightshift/cli.py        |   3 +-
 nightshift/commands.py   |  28 ++++-
 nightshift/config.py     |  40 ++++++-
 nightshift/pipeline.py   | 235 +++++++++++++++++++++++++++++++++++-
 nightshift/repo_tools.py | 250 +++++++++++++++++++++++++++++++++++++++
 nightshift/runlog.py     |  91 ++++++++++++++
 nightshift/web.py        |  15 ++-
 tests/test_agents.py     |  39 +++++-
 tests/test_config.py     |  53 +++++++++
 tests/test_pipeline.py   |  73 ++++++++++++
 tests/test_repo_tools.py |  47 ++++++++
 tests/test_web.py        |   9 ++
 15 files changed, 1042 insertions(+), 45 deletions(-)
 create mode 100644 nightshift/repo_tools.py
 create mode 100644 nightshift/runlog.py
 create mode 100644 tests/test_repo_tools.py
diff --git a/docs/design.md b/docs/design.md
index 388f9c5..95d636a 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -989,18 +989,18 @@ NightShift should make active runs easier to observe from both the CLI and the w
 
 Implementation tasks:
 
-* [ ] Add a small logging module with structured operational events.
-* [ ] Stream human-readable progress to the CLI during `run` and `run --all`.
-* [ ] Include run id, task id, stage id, agent/backend, command index, retry count, status, duration, and artifact path where available.
-* [ ] Write a per-run log file such as `.nightshift/runs/<run-id>/run.log`.
-* [ ] Optionally write or rotate an aggregate `.nightshift/nightshift.log` for cross-run troubleshooting.
-* [ ] Keep logs operational; do not duplicate full prompts, full model responses, or full command output that already lives in artifacts.
-* [ ] Redact or avoid secrets from logged environment/config values.
-* [ ] Add dashboard support for viewing the latest log tail.
-* [ ] Cap the dashboard log view to the last 100 lines by default.
-* [ ] Keep the full per-run log file available as an artifact unless a later size cap is configured.
-* [ ] Auto-refresh the dashboard log view with the existing dashboard refresh model.
-* [ ] Add tests for log writing, CLI progress hooks, dashboard log rendering, missing log files, and the 100-line cap.
+* [x] Add a small logging module with structured operational events.
+* [x] Stream human-readable progress to the CLI during `run` and `run --all`.
+* [x] Include run id, task id, stage id, agent/backend, command index, retry count, status, duration, and artifact path where available.
+* [x] Write a per-run log file such as `.nightshift/runs/<run-id>/run.log`.
+* [x] Optionally write or rotate an aggregate `.nightshift/nightshift.log` for cross-run troubleshooting.
+* [x] Keep logs operational; do not duplicate full prompts, full model responses, or full command output that already lives in artifacts.
+* [x] Redact or avoid secrets from logged environment/config values.
+* [x] Add dashboard support for viewing the latest log tail.
+* [x] Cap the dashboard log view to the last 100 lines by default.
+* [x] Keep the full per-run log file available as an artifact unless a later size cap is configured.
+* [x] Auto-refresh the dashboard log view with the existing dashboard refresh model.
+* [x] Add tests for log writing, CLI progress hooks, dashboard log rendering, missing log files, and the 100-line cap.
 
 Acceptance Criteria:
 
@@ -1019,35 +1019,35 @@ Notes:
 
 ## Phase 24: Per-Agent Model Parameters
 
-- [ ] Add `temperature` to agent config.
-- [ ] Pass temperature to Ollama/OpenAI-compatible backends.
-- [ ] Default safely if omitted.
-- [ ] Add config validation tests.
+- [x] Add `temperature` to agent config.
+- [x] Pass temperature to Ollama/OpenAI-compatible backends.
+- [x] Default safely if omitted.
+- [x] Add config validation tests.
 
 ## Phase 25: Repo Lookup Tools MVP
 
-- [ ] Add tool interface for repo operations.
-- [ ] Implement scoped `list_files`.
-- [ ] Implement scoped `read_file`.
-- [ ] Implement scoped `grep`.
-- [ ] Enforce existing path safety rules.
-- [ ] Log tool calls as artifacts.
+- [x] Add tool interface for repo operations.
+- [x] Implement scoped `list_files`.
+- [x] Implement scoped `read_file`.
+- [x] Implement scoped `grep`.
+- [x] Enforce existing path safety rules.
+- [x] Log tool calls as artifacts.
 
 ## Phase 26: Planner Code-Discovery Support
 
-- [ ] Teach planner prompt to request needed code context.
-- [ ] Add structured planner output for lookup requests.
-- [ ] Execute requested lookup tools.
-- [ ] Save `files-inspected.md`.
-- [ ] Re-run planner with retrieved context.
+- [x] Teach planner prompt to request needed code context.
+- [x] Add structured planner output for lookup requests.
+- [x] Execute requested lookup tools.
+- [x] Save `files-inspected.md`.
+- [x] Re-run planner with retrieved context.
 
 ## Phase 27: Context Pack Builder
 
-- [ ] Add `repo_context` stage.
-- [ ] Generate `context-pack.md`.
-- [ ] Include task, acceptance criteria, relevant files, snippets, and constraints.
-- [ ] Add line-numbered excerpts.
-- [ ] Add context-size caps.
+- [x] Add `repo_context` stage.
+- [x] Generate `context-pack.md`.
+- [x] Include task, acceptance criteria, relevant files, snippets, and constraints.
+- [x] Add line-numbered excerpts.
+- [x] Add context-size caps.
 
 ## Phase 28: Project Context Chart MVP
 
diff --git a/nightshift/agents.py b/nightshift/agents.py
index fdc652a..69618b3 100644
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@@ -3,13 +3,18 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+import json
+import os
 from pathlib import Path
 import subprocess
 import time
+from urllib import request
+from urllib.error import URLError
 
 from .artifacts import ArtifactStore
 from .config import AgentConfig, StageConfig
 from .errors import AgentError, SafetyError
+from .runlog import NullRunLogger, RunLogger
 from .safety import resolve_inside_root, resolve_project_root
 from .stages import StageResult, StageStatus
 from .tasks import Task
@@ -43,11 +48,13 @@ class AgentExecutor:
         agents: dict[str, AgentConfig],
         artifacts: ArtifactStore,
         timeout_seconds: int = DEFAULT_AGENT_TIMEOUT_SECONDS,
+        logger: RunLogger | None = None,
     ) -> None:
         self.project_root = resolve_project_root(project_root)
         self.agents = agents
         self.artifacts = artifacts
         self.timeout_seconds = timeout_seconds
+        self.logger = logger or NullRunLogger()
 
     def run_stage(
         self,
@@ -64,7 +71,7 @@ class AgentExecutor:
         agent = self.agents.get(stage.agent)
         if agent is None:
             raise AgentError(f"Agent error: unknown agent '{stage.agent}' for stage '{stage.id}'.")
-        if agent.backend not in {"command", "ollama"}:
+        if agent.backend not in {"command", "ollama", "openai_compatible"}:
             raise AgentError(
                 f"Agent error: agent '{agent.id}' uses unsupported backend '{agent.backend}'."
             )
@@ -72,6 +79,10 @@ class AgentExecutor:
             raise AgentError(f"Agent error: command backend agent '{agent.id}' has no command.")
         if agent.backend == "ollama" and not agent.model:
             raise AgentError(f"Agent error: ollama backend agent '{agent.id}' has no model.")
+        if agent.backend == "openai_compatible" and not agent.model:
+            raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' has no model.")
+        if agent.backend == "openai_compatible" and not agent.base_url:
+            raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' has no base_url.")
 
         system_prompt = self._read_system_prompt(agent)
         prompt = build_prompt_bundle(
@@ -84,10 +95,37 @@ class AgentExecutor:
             retry_notes=retry_notes or [],
             retry_context=retry_context,
         )
+        self.logger.event(
+            "agent.start",
+            "Starting agent",
+            stage_id=stage.id,
+            agent_id=agent.id,
+            backend=agent.backend,
+            model=agent.model,
+            temperature=agent.temperature,
+        )
         invocation = self._invoke(agent, prompt)
+        self.logger.event(
+            "agent.finish",
+            "Finished agent",
+            stage_id=stage.id,
+            agent_id=agent.id,
+            backend=agent.backend,
+            exit_code=invocation.exit_code,
+            duration=f"{invocation.duration_seconds:.3f}s",
+            timed_out=str(invocation.timed_out).lower(),
+        )
         output_filename = stage.output or f"{stage.id}.md"
         output = format_agent_invocation(stage.id, invocation)
         output_path = self.artifacts.write_stage_output(task.id, output_filename, output)
+        self.logger.event(
+            "artifact.write",
+            "Wrote agent artifact",
+            stage_id=stage.id,
+            task_id=task.id,
+            agent_id=agent.id,
+            artifact_path=output_path.relative_to(self.project_root),
+        )
 
         if invocation.timed_out:
             status: StageStatus = "fail"
@@ -135,6 +173,8 @@ class AgentExecutor:
     def _invoke(self, agent: AgentConfig, prompt: str) -> AgentInvocation:
         if agent.backend == "ollama":
             return self._invoke_ollama(agent, prompt)
+        if agent.backend == "openai_compatible":
+            return self._invoke_openai_compatible(agent, prompt)
         return self._invoke_command(agent, prompt)
 
     def _invoke_command(self, agent: AgentConfig, prompt: str) -> AgentInvocation:
@@ -180,12 +220,15 @@ class AgentExecutor:
         if not agent.model:
             raise AgentError(f"Agent error: ollama backend agent '{agent.id}' has no model.")
         command = f"ollama run {agent.model}"
+        prompt_input = prompt
+        if agent.temperature is not None:
+            prompt_input = f"/set parameter temperature {agent.temperature}\n{prompt}"
         started = time.monotonic()
         try:
             completed = subprocess.run(
                 ["ollama", "run", agent.model],
                 cwd=self.project_root,
-                input=prompt,
+                input=prompt_input,
                 capture_output=True,
                 text=True,
                 encoding="utf-8",
@@ -196,7 +239,7 @@ class AgentExecutor:
             return AgentInvocation(
                 agent_id=agent.id,
                 command=command,
-                prompt=prompt,
+                prompt=prompt_input,
                 exit_code=completed.returncode,
                 stdout=_coerce_output(completed.stdout),
                 stderr=_coerce_output(completed.stderr),
@@ -207,7 +250,7 @@ class AgentExecutor:
             return AgentInvocation(
                 agent_id=agent.id,
                 command=command,
-                prompt=prompt,
+                prompt=prompt_input,
                 exit_code=127,
                 stdout="",
                 stderr=str(exc),
@@ -218,7 +261,7 @@ class AgentExecutor:
             return AgentInvocation(
                 agent_id=agent.id,
                 command=command,
-                prompt=prompt,
+                prompt=prompt_input,
                 exit_code=-1,
                 stdout=_coerce_output(exc.stdout),
                 stderr=_coerce_output(exc.stderr),
@@ -226,6 +269,63 @@ class AgentExecutor:
                 timed_out=True,
             )
 
+    def _invoke_openai_compatible(self, agent: AgentConfig, prompt: str) -> AgentInvocation:
+        if not agent.model or not agent.base_url:
+            raise AgentError(f"Agent error: openai_compatible backend agent '{agent.id}' is incomplete.")
+        url = agent.base_url.rstrip("/") + "/chat/completions"
+        command = f"POST {url}"
+        body: dict[str, object] = {
+            "model": agent.model,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        if agent.temperature is not None:
+            body["temperature"] = agent.temperature
+        headers = {"Content-Type": "application/json"}
+        api_key_env = agent.api_key_env or "OPENAI_API_KEY"
+        api_key = os.environ.get(api_key_env)
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        started = time.monotonic()
+        try:
+            payload = json.dumps(body).encode("utf-8")
+            req = request.Request(url, data=payload, headers=headers, method="POST")
+            with request.urlopen(req, timeout=self.timeout_seconds) as response:
+                raw = response.read().decode("utf-8", errors="replace")
+            duration = time.monotonic() - started
+            return AgentInvocation(
+                agent_id=agent.id,
+                command=command,
+                prompt=prompt,
+                exit_code=0,
+                stdout=_extract_openai_content(raw),
+                stderr="",
+                duration_seconds=duration,
+            )
+        except TimeoutError:
+            duration = time.monotonic() - started
+            return AgentInvocation(
+                agent_id=agent.id,
+                command=command,
+                prompt=prompt,
+                exit_code=-1,
+                stdout="",
+                stderr="Request timed out.",
+                duration_seconds=duration,
+                timed_out=True,
+            )
+        except (OSError, URLError) as exc:
+            duration = time.monotonic() - started
+            return AgentInvocation(
+                agent_id=agent.id,
+                command=command,
+                prompt=prompt,
+                exit_code=1,
+                stdout="",
+                stderr=str(exc),
+                duration_seconds=duration,
+            )
+
 
 def build_prompt_bundle(
     system_prompt: str,
@@ -294,6 +394,20 @@ def _coerce_output(value: str | bytes | None) -> str:
     return value
 
 
+def _extract_openai_content(raw: str) -> str:
+    try:
+        data = json.loads(raw)
+        choices = data.get("choices", [])
+        if choices:
+            message = choices[0].get("message", {})
+            content = message.get("content")
+            if isinstance(content, str):
+                return content
+    except (json.JSONDecodeError, AttributeError):
+        pass
+    return raw
+
+
 def output_contract_for(stage: StageConfig) -> str:
     if stage.type in {"agent_review", "review"}:
         return "\n".join(
@@ -305,6 +419,20 @@ def output_contract_for(stage: StageConfig) -> str:
                 "context_update: <compact useful note>",
             ]
         )
+    if stage.type == "agent" and ("plan" in stage.id.lower() or stage.agent == "planner"):
+        return "\n".join(
+            [
+                "Write the requested stage output in concise markdown.",
+                "",
+                "If you need repository context before finalizing the plan, include:",
+                "lookup_requests:",
+                "- tool: list_files | read_file | grep",
+                "  path: <relative path>",
+                "  pattern: <glob for list_files or regex for grep>",
+                "",
+                "NightShift will run these read-only lookup tools, save files-inspected.md, and re-run this planner stage with the retrieved context.",
+            ]
+        )
     return "Write the requested stage output in concise markdown."
 
 
diff --git a/nightshift/artifacts.py b/nightshift/artifacts.py
index c9cba75..2bdda65 100644
--- a/nightshift/artifacts.py
+++ b/nightshift/artifacts.py
@@ -39,6 +39,8 @@ class ArtifactStore:
         self.project_context_path = self.artifact_root / "project-context.md"
         self.run_summary_path = self.run_dir / "run-summary.md"
         self.config_snapshot_path = self.run_dir / "config.snapshot.yaml"
+        self.run_log_path = self.run_dir / "run.log"
+        self.aggregate_log_path = self.artifact_root / "nightshift.log"
 
     @classmethod
     def from_config(cls, config: NightShiftConfig, run_id: str | None = None) -> "ArtifactStore":
diff --git a/nightshift/cli.py b/nightshift/cli.py
index 38d474c..dccb2a7 100644
--- a/nightshift/cli.py
+++ b/nightshift/cli.py
@@ -10,6 +10,7 @@ from .config import validate_config
 from .errors import NightShiftError
 from .init import init_project
 from .pipeline import PipelineRunner
+from .runlog import RunLogger
 from .status import build_status, format_status
 from .tasks import (
     ensure_dependencies_satisfied,
@@ -80,7 +81,7 @@ def main(argv: list[str] | None = None) -> int:
             validate_task_dependencies(tasks)
             if args.all and args.task:
                 parser.error("run accepts either --all or --task, not both.")
-            runner = PipelineRunner(config)
+            runner = PipelineRunner(config, logger=RunLogger(console=print))
             if args.all:
                 selected = [task for task in tasks if not task.completed]
                 result = runner.run_tasks(selected)
diff --git a/nightshift/commands.py b/nightshift/commands.py
index 267b611..4949064 100644
--- a/nightshift/commands.py
+++ b/nightshift/commands.py
@@ -12,6 +12,7 @@ import time
 from .artifacts import ArtifactStore
 from .config import SafetyConfig, StageConfig
 from .errors import CommandError, SafetyError
+from .runlog import NullRunLogger, RunLogger
 from .safety import ensure_command_allowed, resolve_inside_root, resolve_project_root
 from .stages import StageResult
 
@@ -38,11 +39,13 @@ class CommandExecutor:
         safety: SafetyConfig,
         artifacts: ArtifactStore,
         timeout_seconds: int = DEFAULT_COMMAND_TIMEOUT_SECONDS,
+        logger: RunLogger | None = None,
     ) -> None:
         self.project_root = resolve_project_root(project_root)
         self.safety = safety
         self.artifacts = artifacts
         self.timeout_seconds = timeout_seconds
+        self.logger = logger or NullRunLogger()
 
     def run_stage(self, stage: StageConfig, task_id: str) -> StageResult:
         if stage.type != "command":
@@ -56,7 +59,14 @@ class CommandExecutor:
         status = "pass"
         reason = "All commands passed."
 
-        for command in stage.commands:
+        for index, command in enumerate(stage.commands, start=1):
+            self.logger.event(
+                "command.start",
+                "Starting command",
+                stage_id=stage.id,
+                command_index=index,
+                command=command,
+            )
             run = self.run_command(
                 command,
                 shell=stage.shell,
@@ -64,6 +74,15 @@ class CommandExecutor:
                 working_dir=stage.working_dir,
             )
             runs.append(run)
+            self.logger.event(
+                "command.finish",
+                "Finished command",
+                stage_id=stage.id,
+                command_index=index,
+                exit_code=run.exit_code,
+                duration=f"{run.duration_seconds:.3f}s",
+                timed_out=str(run.timed_out).lower(),
+            )
             if run.timed_out:
                 status = "fail"
                 timeout = stage.timeout_seconds or self.timeout_seconds
@@ -80,6 +99,13 @@ class CommandExecutor:
             output_filename,
             format_command_runs(stage.id, runs),
         )
+        self.logger.event(
+            "artifact.write",
+            "Wrote command artifact",
+            stage_id=stage.id,
+            task_id=task_id,
+            artifact_path=output_path.relative_to(self.project_root),
+        )
         return StageResult(
             stage_id=stage.id,
             status=status,  # type: ignore[arg-type]
diff --git a/nightshift/config.py b/nightshift/config.py
index 90f3cf6..7d70d86 100644
--- a/nightshift/config.py
+++ b/nightshift/config.py
@@ -43,6 +43,9 @@ class AgentConfig:
     system_prompt: Path
     model: str | None = None
     role: str | None = None
+    temperature: float | None = None
+    base_url: str | None = None
+    api_key_env: str | None = None
 
 
 @dataclass(frozen=True)
@@ -83,7 +86,7 @@ class NightShiftConfig:
 
 AGENT_STAGE_TYPES = {"agent", "agent_review", "review"}
 COMMAND_STAGE_TYPES = {"command"}
-SUPPORTED_STAGE_TYPES = AGENT_STAGE_TYPES | COMMAND_STAGE_TYPES | {"summarize"}
+SUPPORTED_STAGE_TYPES = AGENT_STAGE_TYPES | COMMAND_STAGE_TYPES | {"repo_context", "summarize"}
 
 
 def load_config(path: str | Path = "nightshift.yaml") -> NightShiftConfig:
@@ -181,10 +184,20 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
         backend = _require_string(agent_raw, "backend", f"agents.{agent_id}")
         command = _optional_string(agent_raw.get("command"), f"agents.{agent_id}.command")
         model = _optional_string(agent_raw.get("model"), f"agents.{agent_id}.model")
-        if backend not in {"command", "ollama"}:
+        base_url = _optional_string(agent_raw.get("base_url"), f"agents.{agent_id}.base_url")
+        api_key_env = _optional_string(agent_raw.get("api_key_env"), f"agents.{agent_id}.api_key_env")
+        temperature = _optional_float_or_none(
+            agent_raw.get("temperature"),
+            f"agents.{agent_id}.temperature",
+        )
+        if temperature is not None and temperature < 0:
+            raise ConfigError(
+                f"Config error: agents.{agent_id}.temperature must be zero or greater."
+            )
+        if backend not in {"command", "ollama", "openai_compatible"}:
             raise ConfigError(
                 f"Config error: agent '{agent_id}' uses unsupported backend '{backend}'. "
-                "Supported backends: command, ollama."
+                "Supported backends: command, ollama, openai_compatible."
             )
         if backend == "command" and command is None:
             raise ConfigError(
@@ -194,6 +207,14 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
             raise ConfigError(
                 f"Config error: ollama backend agent '{agent_id}' must define model."
             )
+        if backend == "openai_compatible" and model is None:
+            raise ConfigError(
+                f"Config error: openai_compatible backend agent '{agent_id}' must define model."
+            )
+        if backend == "openai_compatible" and base_url is None:
+            raise ConfigError(
+                f"Config error: openai_compatible backend agent '{agent_id}' must define base_url."
+            )
         system_prompt = Path(_require_string(agent_raw, "system_prompt", f"agents.{agent_id}"))
         agents[str(agent_id)] = AgentConfig(
             id=str(agent_id),
@@ -202,6 +223,9 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
             system_prompt=system_prompt,
             model=model,
             role=_optional_string(agent_raw.get("role"), f"agents.{agent_id}.role"),
+            temperature=temperature,
+            base_url=base_url,
+            api_key_env=api_key_env,
         )
 
     experiment_raw = raw.get("experiment", {})
@@ -444,6 +468,8 @@ def _parse_scalar(value: str) -> Any:
         return None
     if re.fullmatch(r"-?\d+", value):
         return int(value)
+    if re.fullmatch(r"-?(\d+\.\d*|\d*\.\d+)", value):
+        return float(value)
     if (value.startswith('"') and value.endswith('"')) or (
         value.startswith("'") and value.endswith("'")
     ):
@@ -492,6 +518,14 @@ def _optional_int_or_none(value: Any, context: str) -> int | None:
     return _optional_int(value, context)
 
 
+def _optional_float_or_none(value: Any, context: str) -> float | None:
+    if value is None:
+        return None
+    if isinstance(value, bool) or not isinstance(value, (int, float)):
+        raise ConfigError(f"Config error: '{context}' must be a number when set.")
+    return float(value)
+
+
 def _string_tuple(value: Any, context: str) -> tuple[str, ...]:
     if value is None:
         return ()
diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py
index 2f305b2..39d0e96 100644
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 from dataclasses import dataclass
 from pathlib import Path
+import re
 
 from .agents import AgentExecutor
 from .artifacts import ArtifactStore
@@ -14,6 +15,8 @@ from .errors import PipelineError
 from .errors import NightShiftError
 from .git import ensure_clean_worktree, write_diff_artifact, write_git_artifacts
 from .reports import ReportGenerator
+from .repo_tools import RepoTools, extract_agent_stdout, parse_lookup_requests
+from .runlog import RunLogger
 from .stages import StageResult
 from .tasks import Task, mark_task_completed
 
@@ -46,9 +49,11 @@ class PipelineRunner:
         artifacts: ArtifactStore | None = None,
         agent_timeout_seconds: int = 600,
         command_timeout_seconds: int = 300,
+        logger: RunLogger | None = None,
     ) -> None:
         self.config = config
         self.artifacts = artifacts or ArtifactStore.from_config(config)
+        self.logger = logger or RunLogger()
         self.context = ContextManager(self.artifacts)
         self.reports = ReportGenerator(
             config.project.root,
@@ -61,17 +66,33 @@ class PipelineRunner:
             config.agents,
             self.artifacts,
             timeout_seconds=agent_timeout_seconds,
+            logger=self.logger,
         )
         self.command_executor = CommandExecutor(
             config.project.root,
             config.safety,
             self.artifacts,
             timeout_seconds=command_timeout_seconds,
+            logger=self.logger,
+        )
+        self.repo_tools = RepoTools(
+            config.project.root,
+            config.safety,
+            self.artifacts,
+            logger=self.logger,
         )
 
     def run_task(self, task: Task) -> PipelineResult:
         ensure_clean_worktree(self.config.project.root, self.config.safety.require_clean_worktree)
         self.artifacts.initialize_run()
+        self.logger.bind(self.artifacts)
+        self.logger.event(
+            "task.start",
+            "Starting task",
+            run_id=self.artifacts.run_id,
+            task_id=task.id,
+            task_title=task.title,
+        )
         self.artifacts.write_config_snapshot(self.config.path)
         self.artifacts.write_prompt_snapshots(
             {
@@ -97,6 +118,15 @@ class PipelineRunner:
 
         while index < len(stages):
             stage = stages[index]
+            self.logger.event(
+                "stage.start",
+                "Starting stage",
+                run_id=self.artifacts.run_id,
+                task_id=task.id,
+                stage_id=stage.id,
+                stage_type=stage.type,
+                retry_count=retry_count,
+            )
             try:
                 result = self._run_stage(stage, task, previous_outputs, retry_notes)
             except NightShiftError as exc:
@@ -113,6 +143,16 @@ class PipelineRunner:
                 )
             stage_results.append(result)
             previous_outputs[stage.id] = self._read_output(result.output_path)
+            self.logger.event(
+                "stage.finish",
+                "Finished stage",
+                run_id=self.artifacts.run_id,
+                task_id=task.id,
+                stage_id=stage.id,
+                status=result.status,
+                reason=result.reason,
+                artifact_path=result.output_path,
+            )
             if result.context_update:
                 retry_notes.append(f"Context update from '{stage.id}': {result.context_update}")
 
@@ -135,6 +175,16 @@ class PipelineRunner:
                     )
                     break
                 retry_count += 1
+                self.logger.event(
+                    "stage.retry",
+                    "Redirecting after stage result",
+                    run_id=self.artifacts.run_id,
+                    task_id=task.id,
+                    stage_id=stage.id,
+                    status=result.status,
+                    retry_count=retry_count,
+                    next_stage=target_stage,
+                )
                 retry_notes.append(
                     f"Retry {retry_count}: stage '{stage.id}' returned "
                     f"{result.status} ({result.reason}); redirecting to '{target_stage}'."
@@ -179,6 +229,16 @@ class PipelineRunner:
             stage_results,
             context_out_path=context_out_path,
         )
+        self.logger.event(
+            "task.finish",
+            "Finished task",
+            run_id=self.artifacts.run_id,
+            task_id=task.id,
+            status=final_status,
+            retry_count=retry_count,
+            reason=final_reason,
+            artifact_path=self.artifacts.create_task_dir(task.id).directory.relative_to(self.config.project.root),
+        )
 
         return PipelineResult(
             task_id=task.id,
@@ -191,6 +251,8 @@ class PipelineRunner:
 
     def run_tasks(self, tasks: list[Task] | tuple[Task, ...]) -> MultiTaskResult:
         self.artifacts.initialize_run()
+        self.logger.bind(self.artifacts)
+        self.logger.event("run.start", "Starting multi-task run", run_id=self.artifacts.run_id)
         results: list[PipelineResult] = []
         known_ids = {task.id for task in tasks}
         completed_ids = {task.id for task in tasks if task.completed}
@@ -216,6 +278,13 @@ class PipelineRunner:
                     reason="Task blocked by " + "; ".join(reason_parts),
                 )
                 results.append(blocked)
+                self.logger.event(
+                    "task.blocked",
+                    "Task blocked by dependencies",
+                    run_id=self.artifacts.run_id,
+                    task_id=task.id,
+                    reason=blocked.reason,
+                )
                 if not self.config.pipeline.continue_on_task_failure:
                     break
                 continue
@@ -234,6 +303,14 @@ class PipelineRunner:
             format_aggregate_run_summary(results, status, reason),
             encoding="utf-8",
         )
+        self.logger.event(
+            "run.finish",
+            "Finished multi-task run",
+            run_id=self.artifacts.run_id,
+            status=status,
+            completed_count=completed_count,
+            failed_count=failed_count,
+        )
         return MultiTaskResult(
             status=status,
             task_results=tuple(results),
@@ -251,7 +328,7 @@ class PipelineRunner:
     ) -> StageResult:
         if stage.type in {"agent", "agent_review", "review"}:
             context = self.context.read_context(task, retry_notes)
-            return self.agent_executor.run_stage(
+            result = self.agent_executor.run_stage(
                 stage,
                 task,
                 previous_outputs,
@@ -260,8 +337,39 @@ class PipelineRunner:
                 task_context=context.task_context,
                 retry_context=context.retry_context,
             )
+            if stage.type == "agent":
+                return self._maybe_rerun_agent_with_repo_lookup(
+                    stage,
+                    task,
+                    result,
+                    previous_outputs,
+                    retry_notes,
+                    context.project_context,
+                    context.task_context,
+                    context.retry_context,
+                )
+            return result
         if stage.type in COMMAND_STAGE_TYPES:
             return self.command_executor.run_stage(stage, task.id)
+        if stage.type == "repo_context":
+            output_path = self.artifacts.write_stage_output(
+                task.id,
+                stage.output or "context-pack.md",
+                self._build_context_pack(task),
+            )
+            self.logger.event(
+                "artifact.write",
+                "Wrote context pack",
+                stage_id=stage.id,
+                task_id=task.id,
+                artifact_path=output_path.relative_to(self.config.project.root),
+            )
+            return StageResult(
+                stage_id=stage.id,
+                status="pass",
+                reason="Context pack written.",
+                output_path=str(output_path.relative_to(self.config.project.root)),
+            )
         if stage.type == "summarize":
             output_path = self.artifacts.write_stage_output(
                 task.id,
@@ -276,6 +384,103 @@ class PipelineRunner:
             )
         raise PipelineError(f"Pipeline error: unsupported stage type '{stage.type}'.")
 
+    def _maybe_rerun_agent_with_repo_lookup(
+        self,
+        stage: StageConfig,
+        task: Task,
+        result: StageResult,
+        previous_outputs: dict[str, str],
+        retry_notes: list[str],
+        project_context: str,
+        task_context: str,
+        retry_context: str | None,
+    ) -> StageResult:
+        if result.status != "pass" or result.output_path is None:
+            return result
+        output_text = self._read_output(result.output_path)
+        requests = parse_lookup_requests(extract_agent_stdout(output_text))
+        if not requests:
+            return result
+        lookup_context = self.repo_tools.execute_requests(
+            task.id,
+            requests,
+            filename="files-inspected.md",
+        )
+        self.logger.event(
+            "agent.rerun",
+            "Re-running agent with repo lookup context",
+            stage_id=stage.id,
+            task_id=task.id,
+            lookup_count=len(requests),
+        )
+        rerun_outputs = dict(previous_outputs)
+        rerun_outputs["repo_lookup_results"] = lookup_context
+        rerun_result = self.agent_executor.run_stage(
+            stage,
+            task,
+            rerun_outputs,
+            retry_notes,
+            project_context=project_context,
+            task_context=task_context,
+            retry_context=retry_context,
+        )
+        return StageResult(
+            stage_id=rerun_result.stage_id,
+            status=rerun_result.status,
+            reason=(
+                "Agent completed after repo lookup."
+                if rerun_result.status == "pass"
+                else rerun_result.reason
+            ),
+            output_path=rerun_result.output_path,
+            next_stage=rerun_result.next_stage,
+            context_update=rerun_result.context_update,
+        )
+
+    def _build_context_pack(self, task: Task) -> str:
+        terms = _task_search_terms(task)
+        files = self.repo_tools.list_files(".", pattern="*.py", max_files=80)
+        grep_sections: list[str] = []
+        for term in terms[:5]:
+            grep_sections.extend(
+                [
+                    f"### Search: {term}",
+                    "",
+                    "```text",
+                    self.repo_tools.grep(re.escape(term), ".", max_matches=20),
+                    "```",
+                    "",
+                ]
+            )
+        return "\n".join(
+            [
+                "# Context Pack",
+                "",
+                f"Task: `{task.id}`",
+                f"Title: {task.title}",
+                "",
+                "## Acceptance Criteria",
+                "",
+                "\n".join(f"- {item}" for item in task.acceptance_criteria) or "- None",
+                "",
+                "## Constraints",
+                "",
+                f"- Scoped paths: {', '.join(self.config.safety.scoped_paths) or '.'}",
+                "- Repository lookups are read-only.",
+                "- Excerpts are line-numbered where files are read directly.",
+                "",
+                "## Relevant Files",
+                "",
+                "```text",
+                files,
+                "```",
+                "",
+                "## Search Results",
+                "",
+                *grep_sections,
+            ]
+        )
+
     def _read_output(self, output_path: str | None) -> str:
         if output_path is None:
             return ""
@@ -365,9 +570,37 @@ def format_run_metadata(config: NightShiftConfig) -> str:
                 "",
                 f"- Backend: {agent.backend}",
                 f"- Model: {agent.model or ''}",
+                f"- Temperature: {agent.temperature if agent.temperature is not None else ''}",
+                f"- Base URL: {agent.base_url or ''}",
                 f"- Command: {agent.command or ''}",
                 f"- System prompt: {agent.system_prompt}",
                 "",
             ]
         )
     return "\n".join(lines)
+
+
+def _task_search_terms(task: Task) -> list[str]:
+    source = " ".join([task.id, task.title, *task.acceptance_criteria])
+    words = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", source)
+    ignored = {
+        "the",
+        "and",
+        "for",
+        "with",
+        "that",
+        "this",
+        "task",
+        "add",
+        "use",
+        "can",
+        "should",
+        "must",
+    }
+    terms: list[str] = []
+    for word in words:
+        lowered = word.lower()
+        if lowered in ignored or lowered in terms:
+            continue
+        terms.append(lowered)
+    return terms or [task.id]
diff --git a/nightshift/repo_tools.py b/nightshift/repo_tools.py
new file mode 100644
index 0000000..6b94e0f
--- /dev/null
+++ b/nightshift/repo_tools.py
@@ -0,0 +1,250 @@
+"""Scoped repository lookup tools."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+import fnmatch
+import re
+
+from .artifacts import ArtifactStore
+from .config import SafetyConfig
+from .errors import SafetyError
+from .runlog import NullRunLogger, RunLogger
+from .safety import resolve_inside_root, resolve_project_root, validate_scoped_paths
+
+
+DEFAULT_MAX_BYTES = 20_000
+DEFAULT_MAX_MATCHES = 100
+
+
+@dataclass(frozen=True)
+class ToolCall:
+    name: str
+    arguments: dict[str, str]
+    output: str
+
+
+class RepoTools:
+    """Read-only repo tools constrained to configured project scope."""
+
+    def __init__(
+        self,
+        project_root: str | Path,
+        safety: SafetyConfig,
+        artifacts: ArtifactStore,
+        logger: RunLogger | None = None,
+    ) -> None:
+        self.project_root = resolve_project_root(project_root)
+        self.safety = safety
+        self.artifacts = artifacts
+        self.logger = logger or NullRunLogger()
+        self.scoped_roots = validate_scoped_paths(
+            self.project_root,
+            safety.scoped_paths or (".",),
+        )
+
+    def list_files(self, path: str = ".", pattern: str = "*", max_files: int = 200) -> str:
+        root = self._resolve_scoped(path, "list_files path")
+        if not root.exists():
+            return f"Path not found: {path}"
+        if root.is_file():
+            candidates = [root]
+        else:
+            candidates = [item for item in root.rglob("*") if item.is_file()]
+        relative_files = [
+            _relative(item, self.project_root)
+            for item in sorted(candidates)
+            if fnmatch.fnmatch(item.name, pattern)
+        ]
+        lines = relative_files[:max_files]
+        if len(relative_files) > max_files:
+            lines.append(f"... truncated {len(relative_files) - max_files} files")
+        return "\n".join(lines) or "No files found."
+
+    def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str:
+        file_path = self._resolve_scoped(path, "read_file path")
+        if not file_path.exists() or not file_path.is_file():
+            return f"File not found: {path}"
+        data = file_path.read_bytes()[:max_bytes + 1]
+        truncated = len(data) > max_bytes
+        text = data[:max_bytes].decode("utf-8", errors="replace")
+        numbered = _line_number(text)
+        if truncated:
+            numbered += "\n... truncated"
+        return numbered
+
+    def grep(
+        self,
+        pattern: str,
+        path: str = ".",
+        max_matches: int = DEFAULT_MAX_MATCHES,
+    ) -> str:
+        root = self._resolve_scoped(path, "grep path")
+        regex = re.compile(pattern)
+        files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()]
+        matches: list[str] = []
+        for file_path in sorted(files):
+            try:
+                text = file_path.read_text(encoding="utf-8", errors="replace")
+            except OSError:
+                continue
+            for line_number, line in enumerate(text.splitlines(), start=1):
+                if regex.search(line):
+                    matches.append(f"{_relative(file_path, self.project_root)}:{line_number}: {line}")
+                    if len(matches) >= max_matches:
+                        matches.append("... truncated")
+                        return "\n".join(matches)
+        return "\n".join(matches) or "No matches found."
+
+    def write_tool_artifact(self, task_id: str, calls: list[ToolCall], filename: str = "repo-tools.md") -> Path:
+        content = format_tool_calls(calls)
+        path = self.artifacts.write_stage_output(task_id, filename, content)
+        self.logger.event(
+            "artifact.write",
+            "Wrote repo tool artifact",
+            task_id=task_id,
+            artifact_path=path.relative_to(self.project_root),
+        )
+        return path
+
+    def execute_requests(self, task_id: str, requests: list[ToolCall], filename: str = "repo-tools.md") -> str:
+        completed: list[ToolCall] = []
+        for request in requests:
+            self.logger.event(
+                "tool.call",
+                "Running repo lookup tool",
+                task_id=task_id,
+                tool=request.name,
+                **request.arguments,
+            )
+            try:
+                output = self._execute_request(request)
+            except (SafetyError, re.error) as exc:
+                output = str(exc)
+            completed.append(ToolCall(request.name, request.arguments, output))
+        self.write_tool_artifact(task_id, completed, filename=filename)
+        return format_tool_calls(completed)
+
+    def _execute_request(self, request: ToolCall) -> str:
+        if request.name == "list_files":
+            return self.list_files(
+                path=request.arguments.get("path", "."),
+                pattern=request.arguments.get("pattern", "*"),
+            )
+        if request.name == "read_file":
+            path = request.arguments.get("path")
+            if not path:
+                return "Missing required argument: path"
+            return self.read_file(path)
+        if request.name == "grep":
+            pattern = request.arguments.get("pattern")
+            if not pattern:
+                return "Missing required argument: pattern"
+            return self.grep(pattern, path=request.arguments.get("path", "."))
+        return f"Unsupported repo lookup tool: {request.name}"
+
+    def _resolve_scoped(self, path: str, context: str) -> Path:
+        resolved = resolve_inside_root(self.project_root, path, context)
+        for scoped_root in self.scoped_roots:
+            try:
+                resolved.relative_to(scoped_root)
+                return resolved
+            except ValueError:
+                continue
+        scopes = ", ".join(_relative(item, self.project_root) for item in self.scoped_roots)
+        raise SafetyError(f"Safety error: {context} is outside configured scoped paths: {path}. Scopes: {scopes}")
+
+
+def format_tool_calls(calls: list[ToolCall]) -> str:
+    lines = ["# Repo Tool Calls", ""]
+    if not calls:
+        lines.append("No tool calls.")
+        return "\n".join(lines)
+    for index, call in enumerate(calls, start=1):
+        lines.extend(
+            [
+                f"## {index}. {call.name}",
+                "",
+                "Arguments:",
+            ]
+        )
+        for key, value in sorted(call.arguments.items()):
+            lines.append(f"- {key}: `{value}`")
+        lines.extend(["", "Output:", "", "```text", call.output.rstrip(), "```", ""])
+    return "\n".join(lines)
+
+
+def parse_lookup_requests(text: str) -> list[ToolCall]:
+    """Parse a small YAML-like lookup request list from model output."""
+
+    lines = text.splitlines()
+    in_section = False
+    current: dict[str, str] = {}
+    requests: list[ToolCall] = []
+
+    def flush() -> None:
+        nonlocal current
+        if not current:
+            return
+        name = current.pop("tool", "").strip()
+        if name:
+            requests.append(ToolCall(name=name, arguments=dict(current), output=""))
+        current = {}
+
+    for raw_line in lines:
+        stripped = raw_line.strip()
+        if stripped in {"lookup_requests:", "repo_lookup:", "repo_lookups:"}:
+            in_section = True
+            continue
+        if not in_section:
+            continue
+        if not stripped:
+            continue
+        if not raw_line.startswith((" ", "-", "\t")) and not stripped.endswith(":"):
+            break
+        if stripped.startswith("- "):
+            flush()
+            stripped = stripped[2:].strip()
+        if ":" not in stripped:
+            continue
+        key, value = stripped.split(":", 1)
+        key = key.strip()
+        value = value.strip().strip('"').strip("'")
+        if key == "tool" and current:
+            flush()
+        current[key] = value
+    flush()
+    return requests
+
+
+def extract_agent_stdout(artifact_text: str) -> str:
+    lines = artifact_text.splitlines()
+    for index, line in enumerate(lines):
+        if line.strip() != "## stdout":
+            continue
+        start = None
+        for cursor in range(index + 1, len(lines)):
+            if lines[cursor].strip().startswith("```"):
+                start = cursor + 1
+                break
+        if start is None:
+            return ""
+        end = len(lines)
+        for cursor in range(start, len(lines)):
+            if lines[cursor].strip().startswith("```"):
+                end = cursor
+                break
+        return "\n".join(lines[start:end])
+    return artifact_text
+
+
+def _line_number(text: str) -> str:
+    return "\n".join(f"{index}: {line}" for index, line in enumerate(text.splitlines(), start=1))
+
+
+def _relative(path: Path, root: Path) -> str:
+    try:
+        return path.relative_to(root).as_posix()
+    except ValueError:
+        return path.as_posix()
diff --git a/nightshift/runlog.py b/nightshift/runlog.py
new file mode 100644
index 0000000..b8a7014
--- /dev/null
+++ b/nightshift/runlog.py
@@ -0,0 +1,91 @@
+"""Operational run logging for NightShift."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Callable
+
+from .artifacts import ArtifactStore
+
+
+ConsoleWriter = Callable[[str], None]
+
+
+@dataclass(frozen=True)
+class LogEvent:
+    event: str
+    message: str
+    fields: dict[str, object]
+
+
+class RunLogger:
+    """Write concise operational events to CLI and run log artifacts."""
+
+    def __init__(self, console: ConsoleWriter | None = None) -> None:
+        self.console = console
+        self._run_log_path: Path | None = None
+        self._aggregate_log_path: Path | None = None
+
+    def bind(self, artifacts: ArtifactStore) -> None:
+        artifacts.initialize_run()
+        self._run_log_path = artifacts.run_log_path
+        self._aggregate_log_path = artifacts.aggregate_log_path
+
+    def event(self, event: str, message: str, **fields: object) -> None:
+        safe_fields = _redact_fields(fields)
+        line = format_log_line(LogEvent(event=event, message=message, fields=safe_fields))
+        if self.console is not None:
+            self.console(line)
+        for path in (self._run_log_path, self._aggregate_log_path):
+            if path is None:
+                continue
+            path.parent.mkdir(parents=True, exist_ok=True)
+            with path.open("a", encoding="utf-8") as handle:
+                handle.write(line + "\n")
+
+
+class NullRunLogger(RunLogger):
+    def __init__(self) -> None:
+        super().__init__(console=None)
+
+    def bind(self, artifacts: ArtifactStore) -> None:
+        return None
+
+    def event(self, event: str, message: str, **fields: object) -> None:
+        return None
+
+
+def format_log_line(log_event: LogEvent) -> str:
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    parts = [timestamp, log_event.event, log_event.message]
+    for key, value in sorted(log_event.fields.items()):
+        if value is None or value == "":
+            continue
+        parts.append(f"{key}={_format_value(value)}")
+    return " | ".join(parts)
+
+
+def tail_lines(path: Path, limit: int = 100) -> list[str]:
+    if limit <= 0:
+        return []
+    if not path.exists() or not path.is_file():
+        return []
+    return path.read_text(encoding="utf-8", errors="replace").splitlines()[-limit:]
+
+
+def _format_value(value: object) -> str:
+    text = str(value).replace("\n", " ").replace("\r", " ")
+    return text if text else ""
+
+
+def _redact_fields(fields: dict[str, object]) -> dict[str, object]:
+    redacted: dict[str, object] = {}
+    for key, value in fields.items():
+        lowered = key.lower()
+        if any(marker in lowered for marker in ("secret", "token", "password", "key")):
+            redacted[key] = "<redacted>"
+        else:
+            redacted[key] = value
+    return redacted
diff --git a/nightshift/web.py b/nightshift/web.py
index e3d7f98..bbea4d5 100644
--- a/nightshift/web.py
+++ b/nightshift/web.py
@@ -7,6 +7,7 @@ from html import escape
 from pathlib import Path
 
 from .errors import NightShiftError
+from .runlog import tail_lines
 
 
 @dataclass(frozen=True)
@@ -14,6 +15,7 @@ class RunInfo:
     name: str
     path: Path
     summary: str
+    log_tail: tuple[str, ...] = ()
 
 
 def list_runs(artifact_dir: str | Path) -> list[RunInfo]:
@@ -24,7 +26,14 @@ def list_runs(artifact_dir: str | Path) -> list[RunInfo]:
     for path in sorted((item for item in runs_dir.iterdir() if item.is_dir()), reverse=True):
         summary_path = path / "run-summary.md"
         summary = summary_path.read_text(encoding="utf-8") if summary_path.exists() else "No run summary yet."
-        runs.append(RunInfo(name=path.name, path=path, summary=summary))
+        runs.append(
+            RunInfo(
+                name=path.name,
+                path=path,
+                summary=summary,
+                log_tail=tuple(tail_lines(path / "run.log", limit=100)),
+            )
+        )
     return runs
 
 
@@ -51,6 +60,10 @@ def render_dashboard(artifact_dir: str | Path) -> str:
                 "<pre>",
                 escape(run.summary),
                 "</pre>",
+                "<h3>Log Tail</h3>",
+                "<pre>",
+                escape("\n".join(run.log_tail) if run.log_tail else "No run log yet."),
+                "</pre>",
                 "</section>",
             ]
         )
diff --git a/tests/test_agents.py b/tests/test_agents.py
index ad45546..6b73c3e 100644
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 import tempfile
 import unittest
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 from nightshift.agents import AgentExecutor, build_prompt_bundle, parse_review_output
 from nightshift.agents import AgentInvocation, format_agent_invocation
@@ -132,6 +132,43 @@ class AgentExecutorTests(unittest.TestCase):
             output = (root / result.output_path).read_text(encoding="utf-8")
             self.assertIn("ollama run tiny-model", output)
 
+    def test_openai_compatible_agent_sends_temperature(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            prompt_path = root / "planner.md"
+            prompt_path.write_text("Plan carefully.", encoding="utf-8")
+            artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
+            executor = AgentExecutor(
+                root,
+                {
+                    "planner": AgentConfig(
+                        id="planner",
+                        backend="openai_compatible",
+                        command=None,
+                        model="tiny-model",
+                        base_url="http://localhost:11434/v1",
+                        temperature=0.2,
+                        system_prompt=Path("planner.md"),
+                    )
+                },
+                artifacts,
+            )
+            task = parse_tasks(TASK_MD)[0]
+            stage = StageConfig(id="plan", type="agent", agent="planner", output="plan.md")
+            response = MagicMock()
+            response.__enter__.return_value.read.return_value = (
+                b'{"choices":[{"message":{"content":"api output"}}]}'
+            )
+
+            with patch("nightshift.agents.request.urlopen", return_value=response) as urlopen:
+                result = executor.run_stage(stage, task)
+
+            self.assertEqual(result.status, "pass")
+            request_obj = urlopen.call_args.args[0]
+            body = request_obj.data.decode("utf-8")
+            self.assertIn('"temperature": 0.2', body)
+            self.assertIn("api output", (root / result.output_path).read_text(encoding="utf-8"))
+
     def test_agent_artifact_format_tolerates_missing_streams(self) -> None:
         invocation = AgentInvocation(
             agent_id="planner",
diff --git a/tests/test_config.py b/tests/test_config.py
index 4522dce..41b6156 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -167,6 +167,24 @@ class ConfigTests(unittest.TestCase):
             self.assertEqual(config.agents["planner"].model, "qwen2.5-coder:14b")
             self.assertEqual(config.experiment.label, "local-test")
 
+    def test_openai_compatible_backend_loads(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            text = config_path.read_text(encoding="utf-8").replace(
+                "backend: command\n    command: echo",
+                "backend: openai_compatible\n    model: local-model\n    base_url: http://localhost:11434/v1\n    temperature: 0.1",
+                1,
+            )
+            config_path.write_text(text, encoding="utf-8")
+
+            config = load_config(config_path)
+
+            self.assertEqual(config.agents["planner"].backend, "openai_compatible")
+            self.assertEqual(config.agents["planner"].base_url, "http://localhost:11434/v1")
+            self.assertEqual(config.agents["planner"].temperature, 0.1)
+
     def test_command_stage_options_load(self) -> None:
         with tempfile.TemporaryDirectory() as directory:
             root = Path(directory)
@@ -188,6 +206,41 @@ class ConfigTests(unittest.TestCase):
             self.assertEqual(test_stage.timeout_seconds, 30)
             self.assertEqual(test_stage.working_dir, Path("."))
 
+    def test_agent_temperature_loads(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "    system_prompt: agents/planner.md",
+                    "    system_prompt: agents/planner.md\n    temperature: 0.2",
+                    1,
+                ),
+                encoding="utf-8",
+            )
+
+            config = load_config(config_path)
+
+            self.assertEqual(config.agents["planner"].temperature, 0.2)
+
+    def test_agent_temperature_must_be_number(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "    system_prompt: agents/planner.md",
+                    "    system_prompt: agents/planner.md\n    temperature: low",
+                    1,
+                ),
+                encoding="utf-8",
+            )
+
+            with self.assertRaisesRegex(ConfigError, "temperature"):
+                load_config(config_path)
+
     def test_non_command_stage_cannot_define_commands(self) -> None:
         with tempfile.TemporaryDirectory() as directory:
             root = Path(directory)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index c66c555..be4aa60 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -230,6 +230,79 @@ Acceptance Criteria:
             self.assertEqual(result.status, "failed")
             self.assertEqual(result.task_results[0].status, "blocked")
 
+    def test_run_writes_operational_log(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            stages = (StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),)
+            artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
+            config = make_config(root, stages)
+            runner = PipelineRunner(config, artifacts)
+            task = parse_tasks(TASK_MD)[0]
+
+            runner.run_task(task)
+
+            log = (root / ".nightshift" / "runs" / "test-run" / "run.log").read_text(encoding="utf-8")
+            self.assertIn("task.start", log)
+            self.assertIn("stage.start", log)
+            self.assertIn("agent.finish", log)
+
+    def test_planner_lookup_requests_write_files_inspected_and_rerun(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            (root / "target.py").write_text("VALUE = 1\n", encoding="utf-8")
+            (root / "fake_planner.py").write_text(
+                "\n".join(
+                    [
+                        "import sys",
+                        "prompt = sys.stdin.read()",
+                        "if 'repo_lookup_results' in prompt:",
+                        "    print('final plan with context')",
+                        "else:",
+                        "    print('lookup_requests:')",
+                        "    print('- tool: read_file')",
+                        "    print('  path: target.py')",
+                    ]
+                ),
+                encoding="utf-8",
+            )
+            stages = (StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),)
+            config = make_config(root, stages)
+            config.agents["planner"] = AgentConfig(
+                id="planner",
+                backend="command",
+                command="python fake_planner.py",
+                system_prompt=Path("planner.md"),
+            )
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
+            self.assertEqual(result.status, "complete")
+            self.assertTrue((task_dir / "files-inspected.md").exists())
+            self.assertIn("1: VALUE = 1", (task_dir / "files-inspected.md").read_text(encoding="utf-8"))
+            self.assertIn("final plan with context", (task_dir / "plan.md").read_text(encoding="utf-8"))
+
+    def test_repo_context_stage_writes_context_pack(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            (root / "app.py").write_text("def run_pipeline():\n    return True\n", encoding="utf-8")
+            stages = (StageConfig(id="context", type="repo_context", output="context-pack.md"),)
+            config = make_config(root, stages)
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            pack = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id / "context-pack.md"
+            self.assertEqual(result.status, "complete")
+            self.assertIn("Context Pack", pack.read_text(encoding="utf-8"))
+            self.assertIn("app.py", pack.read_text(encoding="utf-8"))
+
 
 def _write_common_files(root: Path) -> None:
     (root / "nightshift.yaml").write_text("project:\n  name: test\n", encoding="utf-8")
diff --git a/tests/test_repo_tools.py b/tests/test_repo_tools.py
new file mode 100644
index 0000000..73e84a0
--- /dev/null
+++ b/tests/test_repo_tools.py
@@ -0,0 +1,47 @@
+from pathlib import Path
+import tempfile
+import unittest
+
+from nightshift.artifacts import ArtifactStore
+from nightshift.config import SafetyConfig
+from nightshift.repo_tools import RepoTools, parse_lookup_requests
+
+
+class RepoToolsTests(unittest.TestCase):
+    def test_repo_tools_are_scoped_and_line_numbered(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            (root / "src").mkdir()
+            (root / "src" / "app.py").write_text("def hello():\n    return 'hi'\n", encoding="utf-8")
+            safety = SafetyConfig(
+                require_clean_worktree=False,
+                scoped_paths=("src",),
+                allowed_commands=(),
+                forbidden_commands=(),
+            )
+            tools = RepoTools(root, safety, ArtifactStore(root, ".nightshift", run_id="test-run"))
+
+            self.assertIn("src/app.py", tools.list_files("src", "*.py"))
+            self.assertIn("1: def hello():", tools.read_file("src/app.py"))
+            self.assertIn("src/app.py:1", tools.grep("hello", "src"))
+
+    def test_parse_lookup_requests(self) -> None:
+        output = """Plan needs context.
+
+lookup_requests:
+- tool: read_file
+  path: nightshift/pipeline.py
+- tool: grep
+  path: nightshift
+  pattern: PipelineRunner
+"""
+
+        requests = parse_lookup_requests(output)
+
+        self.assertEqual([request.name for request in requests], ["read_file", "grep"])
+        self.assertEqual(requests[0].arguments["path"], "nightshift/pipeline.py")
+        self.assertEqual(requests[1].arguments["pattern"], "PipelineRunner")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_web.py b/tests/test_web.py
index 8c09395..83ab6b4 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -19,14 +19,23 @@ class WebDashboardTests(unittest.TestCase):
             artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
             artifacts.initialize_run()
             artifacts.run_summary_path.write_text("# Summary\n\nok", encoding="utf-8")
+            artifacts.run_log_path.write_text(
+                "\n".join(f"line {index}" for index in range(120)),
+                encoding="utf-8",
+            )
 
             runs = list_runs(root / ".nightshift")
             content = read_artifact(root / ".nightshift" / "runs" / "test-run", "run-summary.md")
             escaped = read_artifact(root / ".nightshift" / "runs" / "test-run", "../project-context.md")
+            dashboard = render_dashboard(root / ".nightshift")
 
             self.assertEqual(len(runs), 1)
+            self.assertEqual(len(runs[0].log_tail), 100)
             self.assertIn("ok", content)
             self.assertIn("escapes", escaped)
+            self.assertIn("Log Tail", dashboard)
+            self.assertIn("line 119", dashboard)
+            self.assertNotIn("line 19\n", dashboard)
 
 
 if __name__ == "__main__":