mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 18:18:36 +00:00
Merge PR #1 from RJS
This commit is contained in:
commit
03438e1e8a
|
|
@ -122,6 +122,24 @@ Semantic context stage:
|
||||||
|
|
||||||
This stage builds a lightweight repository index of files, Python symbols, imports, and tests, then writes compact relevant snippets for the current task. It is keyword based with symbol-aware scoring, so it works without a vector database or network dependency.
|
This stage builds a lightweight repository index of files, Python symbols, imports, and tests, then writes compact relevant snippets for the current task. It is keyword based with symbol-aware scoring, so it works without a vector database or network dependency.
|
||||||
|
|
||||||
|
### `on_status` Stage Routing
|
||||||
|
|
||||||
|
Instead of a single `on_fail` catch-all, use `on_status` to route each review status to a different stage:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- id: review
|
||||||
|
type: agent_review
|
||||||
|
agent: reviewer
|
||||||
|
output: review.md
|
||||||
|
on_status:
|
||||||
|
pass: summarize
|
||||||
|
retry: implement
|
||||||
|
fail: plan
|
||||||
|
escalate: human
|
||||||
|
```
|
||||||
|
|
||||||
|
`on_status` supports `pass`, `fail`, `retry`, and `escalate` keys. For `pass`, it overrides sequential progression and any agent-supplied `next_stage`. For non-pass statuses, the lookup order is: `on_status[status]` → `on_fail` → `next_stage` (agent output).
|
||||||
|
|
||||||
## Failure, Retry, and Resource Artifacts
|
## Failure, Retry, and Resource Artifacts
|
||||||
|
|
||||||
Failed command and validation stages write deterministic diagnostics under the task artifact directory:
|
Failed command and validation stages write deterministic diagnostics under the task artifact directory:
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ class SafetyConfig:
|
||||||
allowed_commands: tuple[str, ...]
|
allowed_commands: tuple[str, ...]
|
||||||
forbidden_commands: tuple[str, ...]
|
forbidden_commands: tuple[str, ...]
|
||||||
allowed_env: tuple[str, ...] = ()
|
allowed_env: tuple[str, ...] = ()
|
||||||
|
skip_repo_parts: tuple[str, ...] = ()
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
@ -61,7 +62,7 @@ class StageConfig:
|
||||||
commands: tuple[str, ...] = ()
|
commands: tuple[str, ...] = ()
|
||||||
output: str | None = None
|
output: str | None = None
|
||||||
on_fail: str | None = None
|
on_fail: str | None = None
|
||||||
on_pass: str | None = None
|
on_status: dict[str, str] | None = None
|
||||||
shell: bool = True
|
shell: bool = True
|
||||||
timeout_seconds: int | None = None
|
timeout_seconds: int | None = None
|
||||||
working_dir: Path | None = None
|
working_dir: Path | None = None
|
||||||
|
|
@ -184,6 +185,9 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
)
|
)
|
||||||
|
|
||||||
safety_raw = _require_mapping(raw["safety"], "safety")
|
safety_raw = _require_mapping(raw["safety"], "safety")
|
||||||
|
skip_repo_parts = _string_tuple(
|
||||||
|
safety_raw.get("skip_repo_parts", []), "safety.skip_repo_parts"
|
||||||
|
)
|
||||||
safety = SafetyConfig(
|
safety = SafetyConfig(
|
||||||
require_clean_worktree=_optional_bool(
|
require_clean_worktree=_optional_bool(
|
||||||
safety_raw.get("require_clean_worktree", False),
|
safety_raw.get("require_clean_worktree", False),
|
||||||
|
|
@ -195,6 +199,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
safety_raw.get("forbidden_commands", []), "safety.forbidden_commands"
|
safety_raw.get("forbidden_commands", []), "safety.forbidden_commands"
|
||||||
),
|
),
|
||||||
allowed_env=_string_tuple(safety_raw.get("allowed_env", []), "safety.allowed_env"),
|
allowed_env=_string_tuple(safety_raw.get("allowed_env", []), "safety.allowed_env"),
|
||||||
|
skip_repo_parts=skip_repo_parts,
|
||||||
)
|
)
|
||||||
|
|
||||||
agents_raw = _require_mapping(raw["agents"], "agents")
|
agents_raw = _require_mapping(raw["agents"], "agents")
|
||||||
|
|
@ -393,7 +398,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
commands=commands,
|
commands=commands,
|
||||||
output=_optional_string(stage_raw.get("output"), f"{stage_context}.output"),
|
output=_optional_string(stage_raw.get("output"), f"{stage_context}.output"),
|
||||||
on_fail=_optional_string(stage_raw.get("on_fail"), f"{stage_context}.on_fail"),
|
on_fail=_optional_string(stage_raw.get("on_fail"), f"{stage_context}.on_fail"),
|
||||||
on_pass=_optional_string(stage_raw.get("on_pass"), f"{stage_context}.on_pass"),
|
on_status=_parse_on_status(stage_raw, stage_context),
|
||||||
shell=_optional_bool(stage_raw.get("shell", True), f"{stage_context}.shell"),
|
shell=_optional_bool(stage_raw.get("shell", True), f"{stage_context}.shell"),
|
||||||
timeout_seconds=timeout_seconds,
|
timeout_seconds=timeout_seconds,
|
||||||
working_dir=Path(working_dir_raw) if working_dir_raw else None,
|
working_dir=Path(working_dir_raw) if working_dir_raw else None,
|
||||||
|
|
@ -418,9 +423,12 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
raise ConfigError(
|
raise ConfigError(
|
||||||
f"Config error: stage '{stage.id}' on_fail references unknown stage '{stage.on_fail}'."
|
f"Config error: stage '{stage.id}' on_fail references unknown stage '{stage.on_fail}'."
|
||||||
)
|
)
|
||||||
if stage.on_pass and stage.on_pass not in stage_ids:
|
if stage.on_status:
|
||||||
|
for status_key, target in stage.on_status.items():
|
||||||
|
if target not in stage_ids:
|
||||||
raise ConfigError(
|
raise ConfigError(
|
||||||
f"Config error: stage '{stage.id}' on_pass references unknown stage '{stage.on_pass}'."
|
f"Config error: stage '{stage.id}' on_status.{status_key} "
|
||||||
|
f"references unknown stage '{target}'."
|
||||||
)
|
)
|
||||||
|
|
||||||
return NightShiftConfig(
|
return NightShiftConfig(
|
||||||
|
|
@ -635,3 +643,27 @@ def _string_tuple(value: Any, context: str) -> tuple[str, ...]:
|
||||||
if not isinstance(value, list) or not all(isinstance(item, str) and item for item in value):
|
if not isinstance(value, list) or not all(isinstance(item, str) and item for item in value):
|
||||||
raise ConfigError(f"Config error: '{context}' must be a list of non-empty strings.")
|
raise ConfigError(f"Config error: '{context}' must be a list of non-empty strings.")
|
||||||
return tuple(value)
|
return tuple(value)
|
||||||
|
|
||||||
|
|
||||||
|
VALID_STATUS_KEYS = frozenset({"pass", "fail", "retry", "escalate"})
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_on_status(raw: dict[str, Any], context: str) -> dict[str, str] | None:
|
||||||
|
on_status_raw = raw.get("on_status")
|
||||||
|
if on_status_raw is None:
|
||||||
|
return None
|
||||||
|
if not isinstance(on_status_raw, dict):
|
||||||
|
raise ConfigError(f"Config error: {context}.on_status must be a mapping.")
|
||||||
|
on_status: dict[str, str] = {}
|
||||||
|
for key, value in on_status_raw.items():
|
||||||
|
if key not in VALID_STATUS_KEYS:
|
||||||
|
raise ConfigError(
|
||||||
|
f"Config error: {context}.on_status invalid key '{key}'. "
|
||||||
|
f"Valid keys: {', '.join(sorted(VALID_STATUS_KEYS))}."
|
||||||
|
)
|
||||||
|
if not isinstance(value, str) or not value:
|
||||||
|
raise ConfigError(
|
||||||
|
f"Config error: {context}.on_status.{key} must be a non-empty string."
|
||||||
|
)
|
||||||
|
on_status[key] = value
|
||||||
|
return on_status
|
||||||
|
|
|
||||||
|
|
@ -106,32 +106,16 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]:
|
||||||
updates.append(FileUpdate(path=path, content=content))
|
updates.append(FileUpdate(path=path, content=content))
|
||||||
if not updates:
|
if not updates:
|
||||||
raise PipelineError(
|
raise PipelineError(
|
||||||
"File writer error: no file blocks found. Expected FILE: path with ---CONTENT---/---END--- or fenced blocks like ```file:path.py."
|
"File writer error: no file blocks found. Expected fenced blocks like ```file:path.to."
|
||||||
)
|
)
|
||||||
return tuple(updates)
|
return tuple(updates)
|
||||||
|
|
||||||
|
|
||||||
def _parse_delimited_file_updates(text: str) -> list[FileUpdate]:
|
def _parse_delimited_file_updates(text: str) -> list[FileUpdate]:
|
||||||
updates: list[FileUpdate] = []
|
|
||||||
header_pattern = re.compile(r"(?m)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n")
|
|
||||||
matches = list(header_pattern.finditer(text))
|
|
||||||
for index, match in enumerate(matches):
|
|
||||||
path = match.group("path").strip().strip("`")
|
|
||||||
content_start = match.end()
|
|
||||||
next_file_start = matches[index + 1].start() if index + 1 < len(matches) else len(text)
|
|
||||||
raw_content = text[content_start:next_file_start]
|
|
||||||
end_match = re.search(r"(?m)^---END---\s*$", raw_content)
|
|
||||||
if end_match:
|
|
||||||
raw_content = raw_content[: end_match.start()]
|
|
||||||
content = raw_content.rstrip("\r\n") + "\n"
|
|
||||||
if path:
|
|
||||||
updates.append(FileUpdate(path=path, content=content))
|
|
||||||
if updates:
|
|
||||||
return updates
|
|
||||||
|
|
||||||
pattern = re.compile(
|
pattern = re.compile(
|
||||||
r"(?ms)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n(?P<content>.*?)\n---END---\s*$"
|
r"(?ms)^FILE:\s*(?P<path>[^\n]+)\n---CONTENT---\n(?P<content>.*?)\n---END---\s*$"
|
||||||
)
|
)
|
||||||
|
updates: list[FileUpdate] = []
|
||||||
for match in pattern.finditer(text):
|
for match in pattern.finditer(text):
|
||||||
path = match.group("path").strip().strip("`")
|
path = match.group("path").strip().strip("`")
|
||||||
content = match.group("content")
|
content = match.group("content")
|
||||||
|
|
|
||||||
|
|
@ -200,8 +200,26 @@ class PipelineRunner:
|
||||||
retry_notes.append(f"Context update from '{stage.id}': {result.context_update}")
|
retry_notes.append(f"Context update from '{stage.id}': {result.context_update}")
|
||||||
|
|
||||||
if result.status == "pass":
|
if result.status == "pass":
|
||||||
pass_target_stage = result.next_stage or stage.on_pass
|
if stage.on_status and "pass" in stage.on_status:
|
||||||
if stage.type in {"agent_review", "review"} and result.next_stage:
|
target = stage.on_status["pass"]
|
||||||
|
if target not in stage_indexes:
|
||||||
|
final_status = "failed"
|
||||||
|
final_reason = (
|
||||||
|
f"Stage '{stage.id}' on_status.pass references unknown stage '{target}'."
|
||||||
|
)
|
||||||
|
break
|
||||||
|
self.logger.event(
|
||||||
|
"stage.next",
|
||||||
|
"Jumping via on_status.pass",
|
||||||
|
run_id=self.artifacts.run_id,
|
||||||
|
task_id=task.id,
|
||||||
|
stage_id=stage.id,
|
||||||
|
next_stage=target,
|
||||||
|
)
|
||||||
|
index = stage_indexes[target]
|
||||||
|
continue
|
||||||
|
if stage.type in {"agent_review", "review"}:
|
||||||
|
if result.next_stage:
|
||||||
self.logger.event(
|
self.logger.event(
|
||||||
"stage.next_ignored",
|
"stage.next_ignored",
|
||||||
"Ignoring next_stage from passing review",
|
"Ignoring next_stage from passing review",
|
||||||
|
|
@ -210,12 +228,13 @@ class PipelineRunner:
|
||||||
stage_id=stage.id,
|
stage_id=stage.id,
|
||||||
requested_next_stage=result.next_stage,
|
requested_next_stage=result.next_stage,
|
||||||
)
|
)
|
||||||
pass_target_stage = stage.on_pass
|
index += 1
|
||||||
if pass_target_stage:
|
continue
|
||||||
if pass_target_stage not in stage_indexes:
|
if result.next_stage:
|
||||||
|
if result.next_stage not in stage_indexes:
|
||||||
final_status = "failed"
|
final_status = "failed"
|
||||||
final_reason = (
|
final_reason = (
|
||||||
f"Stage '{stage.id}' requested unknown next stage '{pass_target_stage}'."
|
f"Stage '{stage.id}' requested unknown next stage '{result.next_stage}'."
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
self.logger.event(
|
self.logger.event(
|
||||||
|
|
@ -224,14 +243,14 @@ class PipelineRunner:
|
||||||
run_id=self.artifacts.run_id,
|
run_id=self.artifacts.run_id,
|
||||||
task_id=task.id,
|
task_id=task.id,
|
||||||
stage_id=stage.id,
|
stage_id=stage.id,
|
||||||
next_stage=pass_target_stage,
|
next_stage=result.next_stage,
|
||||||
)
|
)
|
||||||
index = stage_indexes[pass_target_stage]
|
index = stage_indexes[result.next_stage]
|
||||||
continue
|
continue
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
target_stage = _failure_target_stage(stage, result)
|
target_stage = _resolve_retry_target_stage(stage, result)
|
||||||
analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
|
analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
|
||||||
if analysis_note:
|
if analysis_note:
|
||||||
retry_notes.append(analysis_note)
|
retry_notes.append(analysis_note)
|
||||||
|
|
@ -1840,14 +1859,10 @@ def _is_malformed_review_result(result: StageResult) -> bool:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _failure_target_stage(stage: StageConfig, result: StageResult) -> str | None:
|
def _resolve_retry_target_stage(stage: StageConfig, result: StageResult) -> str | None:
|
||||||
if stage.type not in {"agent_review", "review"}:
|
if stage.type in {"agent_review", "review"} and _is_malformed_review_result(result):
|
||||||
return result.next_stage or stage.on_fail
|
|
||||||
if _is_malformed_review_result(result):
|
|
||||||
return None
|
return None
|
||||||
if result.next_stage and result.next_stage != stage.id:
|
return (stage.on_status or {}).get(result.status) or stage.on_fail or result.next_stage
|
||||||
return result.next_stage
|
|
||||||
return stage.on_fail
|
|
||||||
|
|
||||||
|
|
||||||
def _previous_continuity_review_passed(previous_outputs: dict[str, str]) -> bool:
|
def _previous_continuity_review_passed(previous_outputs: dict[str, str]) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from .safety import resolve_inside_root, resolve_project_root, validate_scoped_p
|
||||||
DEFAULT_MAX_BYTES = 20_000
|
DEFAULT_MAX_BYTES = 20_000
|
||||||
DEFAULT_MAX_MATCHES = 100
|
DEFAULT_MAX_MATCHES = 100
|
||||||
DEFAULT_MAX_LOOKUP_REQUESTS = 8
|
DEFAULT_MAX_LOOKUP_REQUESTS = 8
|
||||||
SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"}
|
DEFAULT_SKIPPED_REPO_PARTS = {".git", ".nightshift", "__pycache__", ".venv", "venv"}
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
@ -45,6 +45,7 @@ class RepoTools:
|
||||||
self.project_root,
|
self.project_root,
|
||||||
safety.scoped_paths or (".",),
|
safety.scoped_paths or (".",),
|
||||||
)
|
)
|
||||||
|
self.skipped_parts = DEFAULT_SKIPPED_REPO_PARTS | set(safety.skip_repo_parts)
|
||||||
|
|
||||||
def list_files(self, path: str = ".", pattern: str = "*", max_files: int = 200) -> str:
|
def list_files(self, path: str = ".", pattern: str = "*", max_files: int = 200) -> str:
|
||||||
root = self._resolve_scoped(path, "list_files path")
|
root = self._resolve_scoped(path, "list_files path")
|
||||||
|
|
@ -57,7 +58,7 @@ class RepoTools:
|
||||||
relative_files = [
|
relative_files = [
|
||||||
_relative(item, self.project_root)
|
_relative(item, self.project_root)
|
||||||
for item in sorted(candidates)
|
for item in sorted(candidates)
|
||||||
if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root)
|
if fnmatch.fnmatch(item.name, pattern) and not _is_skipped_repo_path(item, self.project_root, self.skipped_parts)
|
||||||
]
|
]
|
||||||
lines = relative_files[:max_files]
|
lines = relative_files[:max_files]
|
||||||
if len(relative_files) > max_files:
|
if len(relative_files) > max_files:
|
||||||
|
|
@ -66,7 +67,7 @@ class RepoTools:
|
||||||
|
|
||||||
def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str:
|
def read_file(self, path: str, max_bytes: int = DEFAULT_MAX_BYTES) -> str:
|
||||||
file_path = self._resolve_scoped(path, "read_file path")
|
file_path = self._resolve_scoped(path, "read_file path")
|
||||||
if _is_skipped_repo_path(file_path, self.project_root):
|
if _is_skipped_repo_path(file_path, self.project_root, self.skipped_parts):
|
||||||
return f"Path is skipped for repository lookup: {path}"
|
return f"Path is skipped for repository lookup: {path}"
|
||||||
if not file_path.exists() or not file_path.is_file():
|
if not file_path.exists() or not file_path.is_file():
|
||||||
return f"File not found: {path}"
|
return f"File not found: {path}"
|
||||||
|
|
@ -89,7 +90,7 @@ class RepoTools:
|
||||||
files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()]
|
files = [root] if root.is_file() else [item for item in root.rglob("*") if item.is_file()]
|
||||||
matches: list[str] = []
|
matches: list[str] = []
|
||||||
for file_path in sorted(files):
|
for file_path in sorted(files):
|
||||||
if _is_skipped_repo_path(file_path, self.project_root):
|
if _is_skipped_repo_path(file_path, self.project_root, self.skipped_parts):
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
text = file_path.read_text(encoding="utf-8", errors="replace")
|
text = file_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
|
@ -270,9 +271,9 @@ def _relative(path: Path, root: Path) -> str:
|
||||||
return path.as_posix()
|
return path.as_posix()
|
||||||
|
|
||||||
|
|
||||||
def _is_skipped_repo_path(path: Path, root: Path) -> bool:
|
def _is_skipped_repo_path(path: Path, root: Path, skipped_parts: set[str]) -> bool:
|
||||||
try:
|
try:
|
||||||
parts = set(path.relative_to(root).parts)
|
parts = set(path.relative_to(root).parts)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
parts = set(path.parts)
|
parts = set(path.parts)
|
||||||
return bool(parts & SKIPPED_REPO_PARTS)
|
return bool(parts & skipped_parts)
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,12 @@ pipeline:
|
||||||
- id: review
|
- id: review
|
||||||
type: agent_review
|
type: agent_review
|
||||||
agent: reviewer
|
agent: reviewer
|
||||||
|
# on_fail: implement # catch-all for any non-pass status
|
||||||
|
# on_status: # per-status routing (takes priority over on_fail)
|
||||||
|
# pass: summarize
|
||||||
|
# retry: implement
|
||||||
|
# fail: plan
|
||||||
|
# escalate: human
|
||||||
on_fail: implement
|
on_fail: implement
|
||||||
output: review.md
|
output: review.md
|
||||||
|
|
||||||
|
|
@ -333,7 +339,7 @@ If you need repository context before planning, output lookup requests exactly l
|
||||||
|
|
||||||
lookup_requests:
|
lookup_requests:
|
||||||
- tool: read_file
|
- tool: read_file
|
||||||
path: relative/path.py
|
path: relative/path.to
|
||||||
- tool: grep
|
- tool: grep
|
||||||
path: .
|
path: .
|
||||||
pattern: search_regex
|
pattern: search_regex
|
||||||
|
|
@ -350,7 +356,7 @@ REAL_MODEL_IMPLEMENTER_PROMPT = """You are the implementation agent for NightShi
|
||||||
|
|
||||||
Output only complete file content blocks.
|
Output only complete file content blocks.
|
||||||
Use one fenced block per file with this exact opening form:
|
Use one fenced block per file with this exact opening form:
|
||||||
```file:relative/path.py
|
```file:relative/path.to
|
||||||
<complete file content>
|
<complete file content>
|
||||||
```
|
```
|
||||||
Do not include explanations before or after the file blocks.
|
Do not include explanations before or after the file blocks.
|
||||||
|
|
|
||||||
|
|
@ -55,39 +55,57 @@ class ConfigTests(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(ConfigError, "on_fail references unknown stage"):
|
with self.assertRaisesRegex(ConfigError, "on_fail references unknown stage"):
|
||||||
load_config(config_path)
|
load_config(config_path)
|
||||||
|
|
||||||
def test_on_pass_must_reference_existing_stage(self) -> None:
|
def test_on_status_parses_correctly(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
root = Path(directory)
|
root = Path(directory)
|
||||||
init_project(root)
|
init_project(root)
|
||||||
config_path = root / "nightshift.yaml"
|
config_path = root / "nightshift.yaml"
|
||||||
config_path.write_text(
|
text = config_path.read_text(encoding="utf-8")
|
||||||
config_path.read_text(encoding="utf-8").replace(
|
text = text.replace(
|
||||||
"on_fail: plan", "on_pass: missing_stage", 1
|
" on_fail: implement\n output: review.md",
|
||||||
),
|
" output: review.md\n on_status:\n pass: summarize\n retry: implement\n fail: plan",
|
||||||
encoding="utf-8",
|
|
||||||
)
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ConfigError, "on_pass references unknown stage"):
|
|
||||||
load_config(config_path)
|
|
||||||
|
|
||||||
def test_on_pass_loads(self) -> None:
|
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
|
||||||
root = Path(directory)
|
|
||||||
init_project(root)
|
|
||||||
config_path = root / "nightshift.yaml"
|
|
||||||
config_path.write_text(
|
|
||||||
config_path.read_text(encoding="utf-8").replace(
|
|
||||||
" output: plan.md",
|
|
||||||
" output: plan.md\n on_pass: summarize",
|
|
||||||
1,
|
|
||||||
),
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
)
|
||||||
|
config_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
config = load_config(config_path)
|
config = load_config(config_path)
|
||||||
plan_stage = next(stage for stage in config.pipeline.stages if stage.id == "plan")
|
review_stage = next(s for s in config.pipeline.stages if s.id == "review")
|
||||||
|
|
||||||
self.assertEqual(plan_stage.on_pass, "summarize")
|
self.assertEqual(review_stage.on_status, {
|
||||||
|
"pass": "summarize",
|
||||||
|
"retry": "implement",
|
||||||
|
"fail": "plan",
|
||||||
|
})
|
||||||
|
self.assertIsNone(review_stage.on_fail)
|
||||||
|
|
||||||
|
def test_on_status_rejects_invalid_key(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
init_project(root)
|
||||||
|
config_path = root / "nightshift.yaml"
|
||||||
|
text = config_path.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(
|
||||||
|
" on_fail: implement\n output: review.md",
|
||||||
|
" output: review.md\n on_status:\n wat: broken",
|
||||||
|
)
|
||||||
|
config_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(ConfigError, "on_status invalid key"):
|
||||||
|
load_config(config_path)
|
||||||
|
|
||||||
|
def test_on_status_references_unknown_stage(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
init_project(root)
|
||||||
|
config_path = root / "nightshift.yaml"
|
||||||
|
text = config_path.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(
|
||||||
|
" on_fail: implement\n output: review.md",
|
||||||
|
" output: review.md\n on_status:\n fail: missing_stage",
|
||||||
|
)
|
||||||
|
config_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(ConfigError, "on_status.fail references unknown stage"):
|
||||||
|
load_config(config_path)
|
||||||
|
|
||||||
def test_validate_requires_prompt_files(self) -> None:
|
def test_validate_requires_prompt_files(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
|
@ -371,6 +389,39 @@ class ConfigTests(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(ConfigError, "non-command stage 'plan'"):
|
with self.assertRaisesRegex(ConfigError, "non-command stage 'plan'"):
|
||||||
load_config(config_path)
|
load_config(config_path)
|
||||||
|
|
||||||
|
def test_on_status_empty_key_fails(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
init_project(root)
|
||||||
|
config_path = root / "nightshift.yaml"
|
||||||
|
text = config_path.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(
|
||||||
|
" on_fail: implement\n output: review.md",
|
||||||
|
" output: review.md\n on_status:\n pass: ",
|
||||||
|
)
|
||||||
|
config_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(ConfigError, "must be a non-empty string"):
|
||||||
|
load_config(config_path)
|
||||||
|
|
||||||
|
def test_on_fail_fallback_when_on_status_does_not_cover_status(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
init_project(root)
|
||||||
|
config_path = root / "nightshift.yaml"
|
||||||
|
text = config_path.read_text(encoding="utf-8")
|
||||||
|
text = text.replace(
|
||||||
|
" on_fail: implement\n output: review.md",
|
||||||
|
" output: review.md\n on_status:\n pass: summarize\n on_fail: implement",
|
||||||
|
)
|
||||||
|
config_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
config = load_config(config_path)
|
||||||
|
review_stage = next(s for s in config.pipeline.stages if s.id == "review")
|
||||||
|
|
||||||
|
self.assertEqual(review_stage.on_status, {"pass": "summarize"})
|
||||||
|
self.assertEqual(review_stage.on_fail, "implement")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -105,29 +105,145 @@ class PipelineRunnerTests(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertIn("Modified Files", (root / ".nightshift" / "runs" / "test-run" / "run-summary.md").read_text(encoding="utf-8"))
|
self.assertIn("Modified Files", (root / ".nightshift" / "runs" / "test-run" / "run-summary.md").read_text(encoding="utf-8"))
|
||||||
|
|
||||||
def test_on_pass_jumps_to_configured_stage(self) -> None:
|
def test_on_status_routes_pass_to_target(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
root = Path(directory)
|
root = Path(directory)
|
||||||
_write_common_files(root)
|
_write_common_files(root)
|
||||||
stages = (
|
stages = (
|
||||||
StageConfig(id="first", type="agent", agent="planner", output="first.md", on_pass="third"),
|
StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
|
||||||
StageConfig(
|
StageConfig(
|
||||||
id="second",
|
id="review",
|
||||||
type="command",
|
type="agent_review",
|
||||||
commands=('python -c "print(\'should not run\')"',),
|
agent="reviewer",
|
||||||
output="second-output.txt",
|
on_status={"pass": "summarize"},
|
||||||
|
output="review.md",
|
||||||
),
|
),
|
||||||
StageConfig(id="third", type="summarize", output="final-notes.md"),
|
StageConfig(id="implement", type="agent", agent="planner", output="impl.md"),
|
||||||
|
StageConfig(id="summarize", type="summarize", output="final-notes.md"),
|
||||||
)
|
)
|
||||||
config = make_config(root, stages)
|
config = make_config(root, stages)
|
||||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
result = runner.run_task(parse_tasks(TASK_MD)[0])
|
result = runner.run_task(task)
|
||||||
|
|
||||||
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001"
|
|
||||||
self.assertEqual(result.status, "complete")
|
self.assertEqual(result.status, "complete")
|
||||||
self.assertEqual([item.stage_id for item in result.stage_results], ["first", "third"])
|
self.assertEqual(result.retry_count, 0)
|
||||||
self.assertFalse((task_dir / "second-output.txt").exists())
|
self.assertEqual(
|
||||||
|
[r.stage_id for r in result.stage_results],
|
||||||
|
["plan", "review", "summarize"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_on_status_routes_fail_to_target(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
fail_reviewer = 'python -c "print(\'status: fail\\nreason: bad plan\')"'
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
|
||||||
|
StageConfig(
|
||||||
|
id="review",
|
||||||
|
type="agent_review",
|
||||||
|
agent="reviewer",
|
||||||
|
on_status={"fail": "plan"},
|
||||||
|
output="review.md",
|
||||||
|
),
|
||||||
|
StageConfig(id="summarize", type="summarize", output="final-notes.md"),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages)
|
||||||
|
config.agents["reviewer"] = AgentConfig(
|
||||||
|
id="reviewer",
|
||||||
|
backend="command",
|
||||||
|
command=fail_reviewer,
|
||||||
|
system_prompt=Path("reviewer.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
|
result = runner.run_task(task)
|
||||||
|
|
||||||
|
self.assertEqual(result.status, "failed")
|
||||||
|
self.assertEqual(result.retry_count, 2)
|
||||||
|
self.assertEqual(
|
||||||
|
[r.stage_id for r in result.stage_results],
|
||||||
|
["plan", "review", "plan", "review", "plan", "review"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_on_status_escalate_routes_to_human_not_on_fail(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
escalate_reviewer = 'python -c "print(\'status: escalate\\nreason: need human\')"'
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
|
||||||
|
StageConfig(
|
||||||
|
id="review",
|
||||||
|
type="agent_review",
|
||||||
|
agent="reviewer",
|
||||||
|
on_status={
|
||||||
|
"retry": "plan",
|
||||||
|
"escalate": "human",
|
||||||
|
},
|
||||||
|
on_fail="plan",
|
||||||
|
output="review.md",
|
||||||
|
),
|
||||||
|
StageConfig(id="human", type="summarize", output="human-notes.md"),
|
||||||
|
StageConfig(id="summarize", type="summarize", output="final-notes.md"),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages)
|
||||||
|
config.agents["reviewer"] = AgentConfig(
|
||||||
|
id="reviewer",
|
||||||
|
backend="command",
|
||||||
|
command=escalate_reviewer,
|
||||||
|
system_prompt=Path("reviewer.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
|
result = runner.run_task(task)
|
||||||
|
|
||||||
|
self.assertEqual(result.status, "complete")
|
||||||
|
self.assertEqual(result.retry_count, 1)
|
||||||
|
self.assertEqual(
|
||||||
|
[r.stage_id for r in result.stage_results],
|
||||||
|
["plan", "review", "human", "summarize"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_on_fail_fallback_when_status_not_in_on_status(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
fail_reviewer = 'python -c "print(\'status: fail\\nreason: bad\')"'
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="plan", type="agent", agent="planner", output="plan.md"),
|
||||||
|
StageConfig(
|
||||||
|
id="review",
|
||||||
|
type="agent_review",
|
||||||
|
agent="reviewer",
|
||||||
|
on_status={"retry": "plan"},
|
||||||
|
on_fail="implement",
|
||||||
|
output="review.md",
|
||||||
|
),
|
||||||
|
StageConfig(id="implement", type="agent", agent="planner", output="impl.md"),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages)
|
||||||
|
config.agents["reviewer"] = AgentConfig(
|
||||||
|
id="reviewer",
|
||||||
|
backend="command",
|
||||||
|
command=fail_reviewer,
|
||||||
|
system_prompt=Path("reviewer.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
|
result = runner.run_task(task)
|
||||||
|
|
||||||
|
self.assertEqual(result.status, "failed")
|
||||||
|
self.assertEqual(result.retry_count, 2)
|
||||||
|
self.assertEqual(
|
||||||
|
[r.stage_id for r in result.stage_results],
|
||||||
|
["plan", "review", "implement", "review", "implement", "review"],
|
||||||
|
)
|
||||||
|
|
||||||
def test_task_preflight_fails_when_task_specific_test_file_is_missing(self) -> None:
|
def test_task_preflight_fails_when_task_specific_test_file_is_missing(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user