documentation pass and hardening bugfixes

2026-06-14 18:18:36 +00:00 · 2026-05-17 00:49:17 -07:00 · 2026-05-17 00:49:17 -07:00 · 528c0ddeb5
commit 528c0ddeb5
parent d84d580671
17 changed files with 397 additions and 12 deletions
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@ -0,0 +1,82 @@
+# NightShift Quickstart
+
+This guide runs the current MVP with safe example files.
+
+## 1. Install for Development
+
+```bash
+pip install -e .
+```
+
+Or run the module directly:
+
+```bash
+python -m nightshift.cli --help
+```
+
+## 2. Create Starter Files
+
+From a project directory:
+
+```bash
+nightshift init
+```
+
+This creates:
+
+```text
+nightshift.yaml
+tasks.md
+agents/
+```
+
+Existing starter files are not overwritten unless you pass `--force`.
+
+## 3. Validate
+
+```bash
+nightshift validate
+```
+
+Validation checks config structure, task parsing, prompt files, scoped paths, and command safety.
+
+## 4. Run One Task
+
+Run the next incomplete task:
+
+```bash
+nightshift run
+```
+
+Run a specific task:
+
+```bash
+nightshift run --task TASK-001
+```
+
+## 5. Review Artifacts
+
+After a run, inspect:
+
+```text
+.nightshift/runs/<run-id>/
+```
+
+Useful files:
+
+```text
+run-summary.md
+config.snapshot.yaml
+tasks/TASK-001/task.md
+tasks/TASK-001/context.md
+tasks/TASK-001/plan.md
+tasks/TASK-001/test-output.txt
+tasks/TASK-001/stage-results.md
+tasks/TASK-001/context-out.md
+tasks/TASK-001/final-notes.md
+```
+
+## Example Templates
+
+Example run files are available in `templates/`.
+They are safe starter examples and use command-backed fake agents.
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # NightShift

+![NightShift logo](docs/images/logo.png)
+
 Auditable local-first AI coding pipelines.

 NightShift is a deterministic pipeline runner for long-running AI-assisted coding workflows. It runs one markdown task at a time through a declarative YAML pipeline, records the important artifacts, and leaves the user with a reviewable work package.
--- a/docs/images/logo.png
+++ b/docs/images/logo.png
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@ -159,8 +159,8 @@ class AgentExecutor:
                command=agent.command,
                prompt=prompt,
                exit_code=-1,
-                stdout=exc.stdout or "",
-                stderr=exc.stderr or "",
+                stdout=_coerce_output(exc.stdout),
+                stderr=_coerce_output(exc.stderr),
                duration_seconds=duration,
                timed_out=True,
            )
@ -225,6 +225,14 @@ def build_prompt_bundle(
    )


+def _coerce_output(value: str | bytes | None) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    return value
+
+
 def output_contract_for(stage: StageConfig) -> str:
    if stage.type in {"agent_review", "review"}:
        return "\n".join(
--- a/nightshift/artifacts.py
+++ b/nightshift/artifacts.py
@ -6,6 +6,7 @@ from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 import shutil
+import re

 from .config import NightShiftConfig
 from .errors import ArtifactError, SafetyError
@ -32,7 +33,7 @@ class ArtifactStore:
        except SafetyError as exc:
            raise ArtifactError(str(exc)) from exc

-        self.run_id = run_id or default_run_id()
+        self.run_id = _safe_artifact_segment(run_id or default_run_id(), "run id")
        self.run_dir = self._artifact_path("runs", self.run_id)
        self.tasks_dir = self.run_dir / "tasks"
        self.project_context_path = self.artifact_root / "project-context.md"
@ -71,10 +72,11 @@ class ArtifactStore:
        """Create the artifact directory for one task."""

        self.initialize_run()
-        task_dir = self._artifact_path("runs", self.run_id, "tasks", task_id)
+        safe_task_id = _safe_artifact_segment(task_id, "task id")
+        task_dir = self._artifact_path("runs", self.run_id, "tasks", safe_task_id)
        task_dir.mkdir(parents=True, exist_ok=True)
        return TaskArtifactPaths(
-            task_id=task_id,
+            task_id=safe_task_id,
            directory=task_dir,
            task_snapshot=task_dir / "task.md",
        )
@ -122,3 +124,15 @@ def default_run_id(now: datetime | None = None) -> str:

    value = now or datetime.now(timezone.utc)
    return value.strftime("%Y%m%dT%H%M%SZ")
+
+
+def _safe_artifact_segment(value: str, context: str) -> str:
+    if not isinstance(value, str) or not value:
+        raise ArtifactError(f"Artifact error: {context} must be a non-empty string.")
+    if not re.fullmatch(r"[A-Za-z0-9_.-]+", value):
+        raise ArtifactError(
+            f"Artifact error: {context} contains unsafe characters: {value}"
+        )
+    if value in {".", ".."}:
+        raise ArtifactError(f"Artifact error: {context} cannot be '{value}'.")
+    return value
--- a/nightshift/commands.py
+++ b/nightshift/commands.py
@ -112,8 +112,8 @@ class CommandExecutor:
            return CommandRun(
                command=normalized,
                exit_code=-1,
-                stdout=exc.stdout or "",
-                stderr=exc.stderr or "",
+                stdout=_coerce_output(exc.stdout),
+                stderr=_coerce_output(exc.stderr),
                duration_seconds=duration,
                timed_out=True,
            )
@ -146,3 +146,11 @@ def format_command_runs(stage_id: str, runs: list[CommandRun]) -> str:
            ]
        )
    return "\n".join(lines)
+
+
+def _coerce_output(value: str | bytes | None) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    return value
--- a/nightshift/config.py
+++ b/nightshift/config.py
@ -143,7 +143,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:

    safety_raw = _require_mapping(raw["safety"], "safety")
    safety = SafetyConfig(
-        require_clean_worktree=bool(safety_raw.get("require_clean_worktree", False)),
+        require_clean_worktree=_optional_bool(
+            safety_raw.get("require_clean_worktree", False),
+            "safety.require_clean_worktree",
+        ),
        scoped_paths=_string_tuple(safety_raw.get("scoped_paths", []), "safety.scoped_paths"),
        allowed_commands=_string_tuple(safety_raw.get("allowed_commands", []), "safety.allowed_commands"),
        forbidden_commands=_string_tuple(
@ -159,6 +162,15 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
        agent_raw = _require_mapping(agent_raw_value, f"agents.{agent_id}")
        backend = _require_string(agent_raw, "backend", f"agents.{agent_id}")
        command = _optional_string(agent_raw.get("command"), f"agents.{agent_id}.command")
+        if backend != "command":
+            raise ConfigError(
+                f"Config error: agent '{agent_id}' uses unsupported backend '{backend}'. "
+                "Supported backends: command."
+            )
+        if command is None:
+            raise ConfigError(
+                f"Config error: command backend agent '{agent_id}' must define command."
+            )
        system_prompt = Path(_require_string(agent_raw, "system_prompt", f"agents.{agent_id}"))
        agents[str(agent_id)] = AgentConfig(
            id=str(agent_id),
@ -170,7 +182,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
        )

    pipeline_raw = _require_mapping(raw["pipeline"], "pipeline")
-    max_task_retries = int(pipeline_raw.get("max_task_retries", 0))
+    max_task_retries = _optional_int(
+        pipeline_raw.get("max_task_retries", 0),
+        "pipeline.max_task_retries",
+    )
    if max_task_retries < 0:
        raise ConfigError("Config error: pipeline.max_task_retries must be zero or greater.")

@ -211,6 +226,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:

        if stage_type in COMMAND_STAGE_TYPES and not commands:
            raise ConfigError(f"Config error: command stage '{stage_id}' must define commands.")
+        if stage_type not in COMMAND_STAGE_TYPES and commands:
+            raise ConfigError(
+                f"Config error: non-command stage '{stage_id}' must not define commands."
+            )

        stages.append(
            StageConfig(
@ -246,7 +265,10 @@ def _load_yaml_mapping(path: Path) -> dict[str, Any]:
    except ModuleNotFoundError:
        data = _parse_simple_yaml(text)
    else:
+        try:
            data = yaml.safe_load(text)
+        except yaml.YAMLError as exc:  # type: ignore[attr-defined]
+            raise ConfigError(f"Config error: invalid YAML in {path}: {exc}") from exc

    if data is None:
        data = {}
@ -399,6 +421,18 @@ def _optional_string(value: Any, context: str) -> str | None:
    return value


+def _optional_bool(value: Any, context: str) -> bool:
+    if isinstance(value, bool):
+        return value
+    raise ConfigError(f"Config error: '{context}' must be a boolean.")
+
+
+def _optional_int(value: Any, context: str) -> int:
+    if isinstance(value, bool) or not isinstance(value, int):
+        raise ConfigError(f"Config error: '{context}' must be an integer.")
+    return value
+
+
 def _string_tuple(value: Any, context: str) -> tuple[str, ...]:
    if value is None:
        return ()
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -11,6 +11,7 @@ from .commands import CommandExecutor
 from .config import COMMAND_STAGE_TYPES, NightShiftConfig, StageConfig
 from .context import ContextManager
 from .errors import PipelineError
+from .errors import NightShiftError
 from .reports import ReportGenerator
 from .stages import StageResult
 from .tasks import Task
@ -72,7 +73,20 @@ class PipelineRunner:

        while index < len(stages):
            stage = stages[index]
+            try:
                result = self._run_stage(stage, task, previous_outputs, retry_notes)
+            except NightShiftError as exc:
+                result = StageResult(
+                    stage_id=stage.id,
+                    status="fail",
+                    reason=str(exc),
+                )
+            except OSError as exc:
+                result = StageResult(
+                    stage_id=stage.id,
+                    status="fail",
+                    reason=f"Unexpected OS error while running stage: {exc}",
+                )
            stage_results.append(result)
            previous_outputs[stage.id] = self._read_output(result.output_path)
            if result.context_update:
@ -203,4 +217,3 @@ def format_summary_stage(
            "",
        ]
    )
-
--- a/templates/agents/implementer.md
+++ b/templates/agents/implementer.md
@ -0,0 +1,8 @@
+# Implementer
+
+Describe the smallest implementation steps for the task.
+
+Rules:
+- Stay inside the configured project root.
+- Keep notes concise.
+- Mention any test expectations.
--- a/templates/agents/planner.md
+++ b/templates/agents/planner.md
@ -0,0 +1,8 @@
+# Planner
+
+Create a concise plan for the task.
+
+Rules:
+- Do not edit files.
+- Map the task to acceptance criteria.
+- Keep output reviewable.
--- a/templates/agents/reviewer.md
+++ b/templates/agents/reviewer.md
@ -0,0 +1,10 @@
+# Reviewer
+
+Review the task result.
+
+For this fake-agent template, return a passing structured review:
+
+status: pass
+reason: example reviewer accepted the run
+next_stage:
+context_update:
--- a/templates/nightshift.yaml
+++ b/templates/nightshift.yaml
@ -0,0 +1,61 @@
+project:
+  name: nightshift-example
+  root: .
+  task_file: tasks.md
+  artifact_dir: .nightshift
+
+safety:
+  require_clean_worktree: false
+  scoped_paths:
+    - .
+  allowed_commands:
+    - python -c "print('template command stage ok')"
+  forbidden_commands:
+    - rm -rf
+    - git push
+    - curl | bash
+
+agents:
+  planner:
+    backend: command
+    command: python -c "print('Plan generated by template planner.')"
+    system_prompt: agents/planner.md
+
+  implementer:
+    backend: command
+    command: python -c "print('Implementation notes generated by template implementer.')"
+    system_prompt: agents/implementer.md
+
+  reviewer:
+    backend: command
+    command: python -c "print('status: pass'); print('reason: template reviewer accepted the run')"
+    system_prompt: agents/reviewer.md
+
+pipeline:
+  max_task_retries: 1
+  stages:
+    - id: plan
+      type: agent
+      agent: planner
+      output: plan.md
+
+    - id: implement
+      type: agent
+      agent: implementer
+      output: implementation-log.md
+
+    - id: test
+      type: command
+      commands:
+        - python -c "print('template command stage ok')"
+      output: test-output.txt
+
+    - id: review
+      type: agent_review
+      agent: reviewer
+      on_fail: implement
+      output: review.md
+
+    - id: summarize
+      type: summarize
+      output: final-notes.md
--- a/templates/tasks.md
+++ b/templates/tasks.md
@ -0,0 +1,11 @@
+# Tasks
+
+- [ ] TASK-001: Run the example pipeline
+
+Description:
+Exercise the NightShift MVP with fake command-backed agents and a harmless test command.
+
+Acceptance Criteria:
+- The pipeline creates task artifacts
+- The command stage output is recorded
+- The final report explains the run status
--- a/tests/test_artifacts.py
+++ b/tests/test_artifacts.py
@ -51,6 +51,17 @@ class ArtifactStoreTests(unittest.TestCase):
            with self.assertRaisesRegex(ArtifactError, "escapes task directory"):
                store.write_stage_output("TASK-001", "../leak.txt", "nope")

+    def test_run_id_and_task_id_must_be_safe_path_segments(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+
+            with self.assertRaisesRegex(ArtifactError, "run id contains unsafe"):
+                ArtifactStore(root, ".nightshift", run_id="../run")
+
+            store = ArtifactStore(root, ".nightshift", run_id="safe-run")
+            with self.assertRaisesRegex(ArtifactError, "task id contains unsafe"):
+                store.create_task_dir("../TASK-001")
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@ -89,6 +89,36 @@ class CommandExecutorTests(unittest.TestCase):
            with self.assertRaisesRegex(CommandError, "not allowlisted"):
                executor.run_command(FAILING_COMMAND)

+    def test_command_timeout_returns_failed_stage_and_writes_output(self) -> None:
+        slow_command = 'python -c "import time; print(\'start\'); time.sleep(2)"'
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
+            executor = CommandExecutor(
+                root,
+                SafetyConfig(
+                    require_clean_worktree=False,
+                    scoped_paths=(".",),
+                    allowed_commands=(slow_command,),
+                    forbidden_commands=("rm -rf",),
+                ),
+                artifacts,
+                timeout_seconds=0.1,
+            )
+            stage = StageConfig(
+                id="test",
+                type="command",
+                commands=(slow_command,),
+                output="test-output.txt",
+            )
+
+            result = executor.run_stage(stage, "TASK-001")
+
+            self.assertEqual(result.status, "fail")
+            self.assertIn("timed out", result.reason)
+            output = (root / result.output_path).read_text(encoding="utf-8")
+            self.assertIn("Timed out: true", output)
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -79,6 +79,72 @@ class ConfigTests(unittest.TestCase):
            with self.assertRaisesRegex(ConfigError, "not allowlisted"):
                validate_config(config_path)

+    def test_max_task_retries_must_be_integer(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "max_task_retries: 3",
+                    "max_task_retries: three",
+                ),
+                encoding="utf-8",
+            )
+
+            with self.assertRaisesRegex(ConfigError, "pipeline.max_task_retries"):
+                load_config(config_path)
+
+    def test_require_clean_worktree_must_be_boolean(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "require_clean_worktree: false",
+                    "require_clean_worktree: no-thanks",
+                ),
+                encoding="utf-8",
+            )
+
+            with self.assertRaisesRegex(ConfigError, "safety.require_clean_worktree"):
+                load_config(config_path)
+
+    def test_command_backend_agent_requires_command(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "    command: echo\n    system_prompt: agents/planner.md",
+                    "    system_prompt: agents/planner.md",
+                    1,
+                ),
+                encoding="utf-8",
+            )
+
+            with self.assertRaisesRegex(ConfigError, "must define command"):
+                load_config(config_path)
+
+    def test_non_command_stage_cannot_define_commands(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            init_project(root)
+            config_path = root / "nightshift.yaml"
+            config_path.write_text(
+                config_path.read_text(encoding="utf-8").replace(
+                    "      output: plan.md",
+                    "      output: plan.md\n      commands:\n        - python -m unittest",
+                    1,
+                ),
+                encoding="utf-8",
+            )
+
+            with self.assertRaisesRegex(ConfigError, "non-command stage 'plan'"):
+                load_config(config_path)
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -126,6 +126,25 @@ class PipelineRunnerTests(unittest.TestCase):
            self.assertIn("Retry limit reached", result.reason)
            self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])

+    def test_stage_error_is_reported_as_failed_result(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            _write_common_files(root)
+            stages = (
+                StageConfig(id="plan", type="agent", agent="planner", output="../bad.md"),
+            )
+            config = make_config(root, stages)
+            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
+            task = parse_tasks(TASK_MD)[0]
+
+            result = runner.run_task(task)
+
+            self.assertEqual(result.status, "failed")
+            self.assertEqual(result.stage_results[0].status, "fail")
+            self.assertTrue(
+                (root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id / "final-notes.md").exists()
+            )
+

 def _write_common_files(root: Path) -> None:
    (root / "nightshift.yaml").write_text("project:\n  name: test\n", encoding="utf-8")