mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 18:18:36 +00:00
documentation pass and hardening bugfixes
This commit is contained in:
parent
d84d580671
commit
528c0ddeb5
82
QUICKSTART.md
Normal file
82
QUICKSTART.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# NightShift Quickstart
|
||||
|
||||
This guide runs the current MVP with safe example files.
|
||||
|
||||
## 1. Install for Development
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
Or run the module directly:
|
||||
|
||||
```bash
|
||||
python -m nightshift.cli --help
|
||||
```
|
||||
|
||||
## 2. Create Starter Files
|
||||
|
||||
From a project directory:
|
||||
|
||||
```bash
|
||||
nightshift init
|
||||
```
|
||||
|
||||
This creates:
|
||||
|
||||
```text
|
||||
nightshift.yaml
|
||||
tasks.md
|
||||
agents/
|
||||
```
|
||||
|
||||
Existing starter files are not overwritten unless you pass `--force`.
|
||||
|
||||
## 3. Validate
|
||||
|
||||
```bash
|
||||
nightshift validate
|
||||
```
|
||||
|
||||
Validation checks config structure, task parsing, prompt files, scoped paths, and command safety.
|
||||
|
||||
## 4. Run One Task
|
||||
|
||||
Run the next incomplete task:
|
||||
|
||||
```bash
|
||||
nightshift run
|
||||
```
|
||||
|
||||
Run a specific task:
|
||||
|
||||
```bash
|
||||
nightshift run --task TASK-001
|
||||
```
|
||||
|
||||
## 5. Review Artifacts
|
||||
|
||||
After a run, inspect:
|
||||
|
||||
```text
|
||||
.nightshift/runs/<run-id>/
|
||||
```
|
||||
|
||||
Useful files:
|
||||
|
||||
```text
|
||||
run-summary.md
|
||||
config.snapshot.yaml
|
||||
tasks/TASK-001/task.md
|
||||
tasks/TASK-001/context.md
|
||||
tasks/TASK-001/plan.md
|
||||
tasks/TASK-001/test-output.txt
|
||||
tasks/TASK-001/stage-results.md
|
||||
tasks/TASK-001/context-out.md
|
||||
tasks/TASK-001/final-notes.md
|
||||
```
|
||||
|
||||
## Example Templates
|
||||
|
||||
Example run files are available in `templates/`.
|
||||
They are safe starter examples and use command-backed fake agents.
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
# NightShift
|
||||
|
||||

|
||||
|
||||
Auditable local-first AI coding pipelines.
|
||||
|
||||
NightShift is a deterministic pipeline runner for long-running AI-assisted coding workflows. It runs one markdown task at a time through a declarative YAML pipeline, records the important artifacts, and leaves the user with a reviewable work package.
|
||||
|
|
|
|||
BIN
docs/images/logo.png
Normal file
BIN
docs/images/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
|
|
@ -159,8 +159,8 @@ class AgentExecutor:
|
|||
command=agent.command,
|
||||
prompt=prompt,
|
||||
exit_code=-1,
|
||||
stdout=exc.stdout or "",
|
||||
stderr=exc.stderr or "",
|
||||
stdout=_coerce_output(exc.stdout),
|
||||
stderr=_coerce_output(exc.stderr),
|
||||
duration_seconds=duration,
|
||||
timed_out=True,
|
||||
)
|
||||
|
|
@ -225,6 +225,14 @@ def build_prompt_bundle(
|
|||
)
|
||||
|
||||
|
||||
def _coerce_output(value: str | bytes | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
return value
|
||||
|
||||
|
||||
def output_contract_for(stage: StageConfig) -> str:
|
||||
if stage.type in {"agent_review", "review"}:
|
||||
return "\n".join(
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from dataclasses import dataclass
|
|||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import re
|
||||
|
||||
from .config import NightShiftConfig
|
||||
from .errors import ArtifactError, SafetyError
|
||||
|
|
@ -32,7 +33,7 @@ class ArtifactStore:
|
|||
except SafetyError as exc:
|
||||
raise ArtifactError(str(exc)) from exc
|
||||
|
||||
self.run_id = run_id or default_run_id()
|
||||
self.run_id = _safe_artifact_segment(run_id or default_run_id(), "run id")
|
||||
self.run_dir = self._artifact_path("runs", self.run_id)
|
||||
self.tasks_dir = self.run_dir / "tasks"
|
||||
self.project_context_path = self.artifact_root / "project-context.md"
|
||||
|
|
@ -71,10 +72,11 @@ class ArtifactStore:
|
|||
"""Create the artifact directory for one task."""
|
||||
|
||||
self.initialize_run()
|
||||
task_dir = self._artifact_path("runs", self.run_id, "tasks", task_id)
|
||||
safe_task_id = _safe_artifact_segment(task_id, "task id")
|
||||
task_dir = self._artifact_path("runs", self.run_id, "tasks", safe_task_id)
|
||||
task_dir.mkdir(parents=True, exist_ok=True)
|
||||
return TaskArtifactPaths(
|
||||
task_id=task_id,
|
||||
task_id=safe_task_id,
|
||||
directory=task_dir,
|
||||
task_snapshot=task_dir / "task.md",
|
||||
)
|
||||
|
|
@ -122,3 +124,15 @@ def default_run_id(now: datetime | None = None) -> str:
|
|||
|
||||
value = now or datetime.now(timezone.utc)
|
||||
return value.strftime("%Y%m%dT%H%M%SZ")
|
||||
|
||||
|
||||
def _safe_artifact_segment(value: str, context: str) -> str:
|
||||
if not isinstance(value, str) or not value:
|
||||
raise ArtifactError(f"Artifact error: {context} must be a non-empty string.")
|
||||
if not re.fullmatch(r"[A-Za-z0-9_.-]+", value):
|
||||
raise ArtifactError(
|
||||
f"Artifact error: {context} contains unsafe characters: {value}"
|
||||
)
|
||||
if value in {".", ".."}:
|
||||
raise ArtifactError(f"Artifact error: {context} cannot be '{value}'.")
|
||||
return value
|
||||
|
|
|
|||
|
|
@ -112,8 +112,8 @@ class CommandExecutor:
|
|||
return CommandRun(
|
||||
command=normalized,
|
||||
exit_code=-1,
|
||||
stdout=exc.stdout or "",
|
||||
stderr=exc.stderr or "",
|
||||
stdout=_coerce_output(exc.stdout),
|
||||
stderr=_coerce_output(exc.stderr),
|
||||
duration_seconds=duration,
|
||||
timed_out=True,
|
||||
)
|
||||
|
|
@ -146,3 +146,11 @@ def format_command_runs(stage_id: str, runs: list[CommandRun]) -> str:
|
|||
]
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _coerce_output(value: str | bytes | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
return value
|
||||
|
|
|
|||
|
|
@ -143,7 +143,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
|||
|
||||
safety_raw = _require_mapping(raw["safety"], "safety")
|
||||
safety = SafetyConfig(
|
||||
require_clean_worktree=bool(safety_raw.get("require_clean_worktree", False)),
|
||||
require_clean_worktree=_optional_bool(
|
||||
safety_raw.get("require_clean_worktree", False),
|
||||
"safety.require_clean_worktree",
|
||||
),
|
||||
scoped_paths=_string_tuple(safety_raw.get("scoped_paths", []), "safety.scoped_paths"),
|
||||
allowed_commands=_string_tuple(safety_raw.get("allowed_commands", []), "safety.allowed_commands"),
|
||||
forbidden_commands=_string_tuple(
|
||||
|
|
@ -159,6 +162,15 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
|||
agent_raw = _require_mapping(agent_raw_value, f"agents.{agent_id}")
|
||||
backend = _require_string(agent_raw, "backend", f"agents.{agent_id}")
|
||||
command = _optional_string(agent_raw.get("command"), f"agents.{agent_id}.command")
|
||||
if backend != "command":
|
||||
raise ConfigError(
|
||||
f"Config error: agent '{agent_id}' uses unsupported backend '{backend}'. "
|
||||
"Supported backends: command."
|
||||
)
|
||||
if command is None:
|
||||
raise ConfigError(
|
||||
f"Config error: command backend agent '{agent_id}' must define command."
|
||||
)
|
||||
system_prompt = Path(_require_string(agent_raw, "system_prompt", f"agents.{agent_id}"))
|
||||
agents[str(agent_id)] = AgentConfig(
|
||||
id=str(agent_id),
|
||||
|
|
@ -170,7 +182,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
|||
)
|
||||
|
||||
pipeline_raw = _require_mapping(raw["pipeline"], "pipeline")
|
||||
max_task_retries = int(pipeline_raw.get("max_task_retries", 0))
|
||||
max_task_retries = _optional_int(
|
||||
pipeline_raw.get("max_task_retries", 0),
|
||||
"pipeline.max_task_retries",
|
||||
)
|
||||
if max_task_retries < 0:
|
||||
raise ConfigError("Config error: pipeline.max_task_retries must be zero or greater.")
|
||||
|
||||
|
|
@ -211,6 +226,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
|||
|
||||
if stage_type in COMMAND_STAGE_TYPES and not commands:
|
||||
raise ConfigError(f"Config error: command stage '{stage_id}' must define commands.")
|
||||
if stage_type not in COMMAND_STAGE_TYPES and commands:
|
||||
raise ConfigError(
|
||||
f"Config error: non-command stage '{stage_id}' must not define commands."
|
||||
)
|
||||
|
||||
stages.append(
|
||||
StageConfig(
|
||||
|
|
@ -246,7 +265,10 @@ def _load_yaml_mapping(path: Path) -> dict[str, Any]:
|
|||
except ModuleNotFoundError:
|
||||
data = _parse_simple_yaml(text)
|
||||
else:
|
||||
try:
|
||||
data = yaml.safe_load(text)
|
||||
except yaml.YAMLError as exc: # type: ignore[attr-defined]
|
||||
raise ConfigError(f"Config error: invalid YAML in {path}: {exc}") from exc
|
||||
|
||||
if data is None:
|
||||
data = {}
|
||||
|
|
@ -399,6 +421,18 @@ def _optional_string(value: Any, context: str) -> str | None:
|
|||
return value
|
||||
|
||||
|
||||
def _optional_bool(value: Any, context: str) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
raise ConfigError(f"Config error: '{context}' must be a boolean.")
|
||||
|
||||
|
||||
def _optional_int(value: Any, context: str) -> int:
|
||||
if isinstance(value, bool) or not isinstance(value, int):
|
||||
raise ConfigError(f"Config error: '{context}' must be an integer.")
|
||||
return value
|
||||
|
||||
|
||||
def _string_tuple(value: Any, context: str) -> tuple[str, ...]:
|
||||
if value is None:
|
||||
return ()
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from .commands import CommandExecutor
|
|||
from .config import COMMAND_STAGE_TYPES, NightShiftConfig, StageConfig
|
||||
from .context import ContextManager
|
||||
from .errors import PipelineError
|
||||
from .errors import NightShiftError
|
||||
from .reports import ReportGenerator
|
||||
from .stages import StageResult
|
||||
from .tasks import Task
|
||||
|
|
@ -72,7 +73,20 @@ class PipelineRunner:
|
|||
|
||||
while index < len(stages):
|
||||
stage = stages[index]
|
||||
try:
|
||||
result = self._run_stage(stage, task, previous_outputs, retry_notes)
|
||||
except NightShiftError as exc:
|
||||
result = StageResult(
|
||||
stage_id=stage.id,
|
||||
status="fail",
|
||||
reason=str(exc),
|
||||
)
|
||||
except OSError as exc:
|
||||
result = StageResult(
|
||||
stage_id=stage.id,
|
||||
status="fail",
|
||||
reason=f"Unexpected OS error while running stage: {exc}",
|
||||
)
|
||||
stage_results.append(result)
|
||||
previous_outputs[stage.id] = self._read_output(result.output_path)
|
||||
if result.context_update:
|
||||
|
|
@ -203,4 +217,3 @@ def format_summary_stage(
|
|||
"",
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
8
templates/agents/implementer.md
Normal file
8
templates/agents/implementer.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Implementer
|
||||
|
||||
Describe the smallest implementation steps for the task.
|
||||
|
||||
Rules:
|
||||
- Stay inside the configured project root.
|
||||
- Keep notes concise.
|
||||
- Mention any test expectations.
|
||||
8
templates/agents/planner.md
Normal file
8
templates/agents/planner.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Planner
|
||||
|
||||
Create a concise plan for the task.
|
||||
|
||||
Rules:
|
||||
- Do not edit files.
|
||||
- Map the task to acceptance criteria.
|
||||
- Keep output reviewable.
|
||||
10
templates/agents/reviewer.md
Normal file
10
templates/agents/reviewer.md
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# Reviewer
|
||||
|
||||
Review the task result.
|
||||
|
||||
For this fake-agent template, return a passing structured review:
|
||||
|
||||
status: pass
|
||||
reason: example reviewer accepted the run
|
||||
next_stage:
|
||||
context_update:
|
||||
61
templates/nightshift.yaml
Normal file
61
templates/nightshift.yaml
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
project:
|
||||
name: nightshift-example
|
||||
root: .
|
||||
task_file: tasks.md
|
||||
artifact_dir: .nightshift
|
||||
|
||||
safety:
|
||||
require_clean_worktree: false
|
||||
scoped_paths:
|
||||
- .
|
||||
allowed_commands:
|
||||
- python -c "print('template command stage ok')"
|
||||
forbidden_commands:
|
||||
- rm -rf
|
||||
- git push
|
||||
- curl | bash
|
||||
|
||||
agents:
|
||||
planner:
|
||||
backend: command
|
||||
command: python -c "print('Plan generated by template planner.')"
|
||||
system_prompt: agents/planner.md
|
||||
|
||||
implementer:
|
||||
backend: command
|
||||
command: python -c "print('Implementation notes generated by template implementer.')"
|
||||
system_prompt: agents/implementer.md
|
||||
|
||||
reviewer:
|
||||
backend: command
|
||||
command: python -c "print('status: pass'); print('reason: template reviewer accepted the run')"
|
||||
system_prompt: agents/reviewer.md
|
||||
|
||||
pipeline:
|
||||
max_task_retries: 1
|
||||
stages:
|
||||
- id: plan
|
||||
type: agent
|
||||
agent: planner
|
||||
output: plan.md
|
||||
|
||||
- id: implement
|
||||
type: agent
|
||||
agent: implementer
|
||||
output: implementation-log.md
|
||||
|
||||
- id: test
|
||||
type: command
|
||||
commands:
|
||||
- python -c "print('template command stage ok')"
|
||||
output: test-output.txt
|
||||
|
||||
- id: review
|
||||
type: agent_review
|
||||
agent: reviewer
|
||||
on_fail: implement
|
||||
output: review.md
|
||||
|
||||
- id: summarize
|
||||
type: summarize
|
||||
output: final-notes.md
|
||||
11
templates/tasks.md
Normal file
11
templates/tasks.md
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Tasks
|
||||
|
||||
- [ ] TASK-001: Run the example pipeline
|
||||
|
||||
Description:
|
||||
Exercise the NightShift MVP with fake command-backed agents and a harmless test command.
|
||||
|
||||
Acceptance Criteria:
|
||||
- The pipeline creates task artifacts
|
||||
- The command stage output is recorded
|
||||
- The final report explains the run status
|
||||
|
|
@ -51,6 +51,17 @@ class ArtifactStoreTests(unittest.TestCase):
|
|||
with self.assertRaisesRegex(ArtifactError, "escapes task directory"):
|
||||
store.write_stage_output("TASK-001", "../leak.txt", "nope")
|
||||
|
||||
def test_run_id_and_task_id_must_be_safe_path_segments(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
|
||||
with self.assertRaisesRegex(ArtifactError, "run id contains unsafe"):
|
||||
ArtifactStore(root, ".nightshift", run_id="../run")
|
||||
|
||||
store = ArtifactStore(root, ".nightshift", run_id="safe-run")
|
||||
with self.assertRaisesRegex(ArtifactError, "task id contains unsafe"):
|
||||
store.create_task_dir("../TASK-001")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -89,6 +89,36 @@ class CommandExecutorTests(unittest.TestCase):
|
|||
with self.assertRaisesRegex(CommandError, "not allowlisted"):
|
||||
executor.run_command(FAILING_COMMAND)
|
||||
|
||||
def test_command_timeout_returns_failed_stage_and_writes_output(self) -> None:
|
||||
slow_command = 'python -c "import time; print(\'start\'); time.sleep(2)"'
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
|
||||
executor = CommandExecutor(
|
||||
root,
|
||||
SafetyConfig(
|
||||
require_clean_worktree=False,
|
||||
scoped_paths=(".",),
|
||||
allowed_commands=(slow_command,),
|
||||
forbidden_commands=("rm -rf",),
|
||||
),
|
||||
artifacts,
|
||||
timeout_seconds=0.1,
|
||||
)
|
||||
stage = StageConfig(
|
||||
id="test",
|
||||
type="command",
|
||||
commands=(slow_command,),
|
||||
output="test-output.txt",
|
||||
)
|
||||
|
||||
result = executor.run_stage(stage, "TASK-001")
|
||||
|
||||
self.assertEqual(result.status, "fail")
|
||||
self.assertIn("timed out", result.reason)
|
||||
output = (root / result.output_path).read_text(encoding="utf-8")
|
||||
self.assertIn("Timed out: true", output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -79,6 +79,72 @@ class ConfigTests(unittest.TestCase):
|
|||
with self.assertRaisesRegex(ConfigError, "not allowlisted"):
|
||||
validate_config(config_path)
|
||||
|
||||
def test_max_task_retries_must_be_integer(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
init_project(root)
|
||||
config_path = root / "nightshift.yaml"
|
||||
config_path.write_text(
|
||||
config_path.read_text(encoding="utf-8").replace(
|
||||
"max_task_retries: 3",
|
||||
"max_task_retries: three",
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(ConfigError, "pipeline.max_task_retries"):
|
||||
load_config(config_path)
|
||||
|
||||
def test_require_clean_worktree_must_be_boolean(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
init_project(root)
|
||||
config_path = root / "nightshift.yaml"
|
||||
config_path.write_text(
|
||||
config_path.read_text(encoding="utf-8").replace(
|
||||
"require_clean_worktree: false",
|
||||
"require_clean_worktree: no-thanks",
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(ConfigError, "safety.require_clean_worktree"):
|
||||
load_config(config_path)
|
||||
|
||||
def test_command_backend_agent_requires_command(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
init_project(root)
|
||||
config_path = root / "nightshift.yaml"
|
||||
config_path.write_text(
|
||||
config_path.read_text(encoding="utf-8").replace(
|
||||
" command: echo\n system_prompt: agents/planner.md",
|
||||
" system_prompt: agents/planner.md",
|
||||
1,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(ConfigError, "must define command"):
|
||||
load_config(config_path)
|
||||
|
||||
def test_non_command_stage_cannot_define_commands(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
init_project(root)
|
||||
config_path = root / "nightshift.yaml"
|
||||
config_path.write_text(
|
||||
config_path.read_text(encoding="utf-8").replace(
|
||||
" output: plan.md",
|
||||
" output: plan.md\n commands:\n - python -m unittest",
|
||||
1,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(ConfigError, "non-command stage 'plan'"):
|
||||
load_config(config_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -126,6 +126,25 @@ class PipelineRunnerTests(unittest.TestCase):
|
|||
self.assertIn("Retry limit reached", result.reason)
|
||||
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])
|
||||
|
||||
def test_stage_error_is_reported_as_failed_result(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
_write_common_files(root)
|
||||
stages = (
|
||||
StageConfig(id="plan", type="agent", agent="planner", output="../bad.md"),
|
||||
)
|
||||
config = make_config(root, stages)
|
||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||
task = parse_tasks(TASK_MD)[0]
|
||||
|
||||
result = runner.run_task(task)
|
||||
|
||||
self.assertEqual(result.status, "failed")
|
||||
self.assertEqual(result.stage_results[0].status, "fail")
|
||||
self.assertTrue(
|
||||
(root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id / "final-notes.md").exists()
|
||||
)
|
||||
|
||||
|
||||
def _write_common_files(root: Path) -> None:
|
||||
(root / "nightshift.yaml").write_text("project:\n name: test\n", encoding="utf-8")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user