diff --git a/docs/config-reference.md b/docs/config-reference.md index b51658d..8788fb7 100644 --- a/docs/config-reference.md +++ b/docs/config-reference.md @@ -16,6 +16,7 @@ NightShift config is YAML. - `allowed_commands`: exact command-stage allowlist entries after whitespace normalization. - `forbidden_commands`: dangerous fragments blocked before allowlist acceptance. - `allowed_env`: optional environment variable names to pass to command stages. +- `skip_repo_parts`: optional directory or path-part names to exclude from repo lookup tools, in addition to built-in skips such as `.git`, `.nightshift`, `__pycache__`, `.venv`, and `venv`. ## `experiment` @@ -104,14 +105,23 @@ Writer stages: - `code_writer`: agent returns a unified diff directly. - `file_writer`: agent returns complete file content blocks; NightShift generates the unified diff deterministically. Prefer this for local models that wrap or miscount long patch hunks. -`file_writer` blocks use this form: +Code-oriented `file_writer` stages use fenced blocks: ````markdown -```file:relative/path.py +```file:relative/path.to ``` ```` +Writing-oriented `file_writer` stages for `story/chapters` and story state files use delimiter blocks: + +```text +FILE: story/chapters/chapter-001/scene-001.md +---CONTENT--- + +---END--- +``` + Semantic context stage: ```yaml @@ -124,7 +134,7 @@ This stage builds a lightweight repository index of files, Python symbols, impor ### `on_status` Stage Routing -Instead of a single `on_fail` catch-all, use `on_status` to route each review status to a different stage: +Use `on_status` to route stage statuses to different follow-up stages: ```yaml - id: review @@ -135,11 +145,13 @@ Instead of a single `on_fail` catch-all, use `on_status` to route each review st pass: summarize retry: implement fail: plan - escalate: human + escalate: summarize ``` `on_status` supports `pass`, `fail`, `retry`, and `escalate` keys. For `pass`, it overrides sequential progression and any agent-supplied `next_stage`. For non-pass statuses, the lookup order is: `on_status[status]` → `on_fail` → `next_stage` (agent output). +The older `on_pass` key remains supported as a deprecated alias for `on_status.pass`. + ## Failure, Retry, and Resource Artifacts Failed command and validation stages write deterministic diagnostics under the task artifact directory: diff --git a/docs/writer-and-coder.md b/docs/writer-and-coder.md index 52fdd88..749ccc3 100644 --- a/docs/writer-and-coder.md +++ b/docs/writer-and-coder.md @@ -20,7 +20,7 @@ After that correction, the automated test suite passes. - State update file-writer stages receive focused current state context. - Scene editor file-writer stages receive `current_scene_file`. - Agent invocations now write a sibling JSON artifact for reliable stdout/stderr extraction. -- Pipeline config now supports optional `on_pass` routing. +- Pipeline config now supports optional `on_status` routing. The older `on_pass` key remains as a deprecated alias for `on_status.pass`. ## Coding Impact Findings @@ -37,11 +37,11 @@ No non-novel project template files changed in the current diff: The new `editor` agent and review repair routing are only configured in `tutorial-novel/nightshift.yaml`. -### Finding 2: `on_pass` is inert for existing coding configs +### Finding 2: `on_status` is inert for existing coding configs -`on_pass` defaults to `None`, so existing coding templates keep their prior linear pass behavior unless they explicitly opt in. +`on_status` defaults to `None`, so existing coding templates keep their prior linear pass behavior unless they explicitly opt in. -Passing review stages still ignore model-provided `next_stage` values. This preserves the existing safety behavior where reviewers cannot jump around the pipeline on a pass unless the config has an explicit `on_pass`. +Passing review stages still ignore model-provided `next_stage` values. This preserves the existing safety behavior where reviewers cannot jump around the pipeline on a pass unless the config has an explicit `on_status.pass` or legacy `on_pass`. ### Finding 3: Code writer stages still use the same direct patch path @@ -131,6 +131,6 @@ Result: ## Conclusion -After the first-attempt file-writer context fix, I do not see evidence that the writer workflow changes degrade code generation. The shared changes are either opt-in (`on_pass`), artifact-reading improvements (JSON stdout), or narrowly gated to novel state/editor stages. +After the first-attempt file-writer context fix, I do not see evidence that the writer workflow changes degrade code generation. The shared changes are either opt-in (`on_status`), artifact-reading improvements (JSON stdout), or narrowly gated to novel state/editor stages. Remaining non-writer issue: several coding-oriented templates still reference a missing `debugger.md` prompt. That should be handled separately from this writer/coder compatibility pass. diff --git a/nightshift/config.py b/nightshift/config.py index ad71474..3d2023b 100644 --- a/nightshift/config.py +++ b/nightshift/config.py @@ -62,6 +62,7 @@ class StageConfig: commands: tuple[str, ...] = () output: str | None = None on_fail: str | None = None + on_pass: str | None = None on_status: dict[str, str] | None = None shell: bool = True timeout_seconds: int | None = None @@ -398,6 +399,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig: commands=commands, output=_optional_string(stage_raw.get("output"), f"{stage_context}.output"), on_fail=_optional_string(stage_raw.get("on_fail"), f"{stage_context}.on_fail"), + on_pass=_optional_string(stage_raw.get("on_pass"), f"{stage_context}.on_pass"), on_status=_parse_on_status(stage_raw, stage_context), shell=_optional_bool(stage_raw.get("shell", True), f"{stage_context}.shell"), timeout_seconds=timeout_seconds, @@ -423,6 +425,10 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig: raise ConfigError( f"Config error: stage '{stage.id}' on_fail references unknown stage '{stage.on_fail}'." ) + if stage.on_pass and stage.on_pass not in stage_ids: + raise ConfigError( + f"Config error: stage '{stage.id}' on_pass references unknown stage '{stage.on_pass}'." + ) if stage.on_status: for status_key, target in stage.on_status.items(): if target not in stage_ids: @@ -650,8 +656,11 @@ VALID_STATUS_KEYS = frozenset({"pass", "fail", "retry", "escalate"}) def _parse_on_status(raw: dict[str, Any], context: str) -> dict[str, str] | None: on_status_raw = raw.get("on_status") - if on_status_raw is None: + on_pass = _optional_string(raw.get("on_pass"), f"{context}.on_pass") + if on_status_raw is None and on_pass is None: return None + if on_status_raw is None: + return {"pass": on_pass} if on_pass is not None else None if not isinstance(on_status_raw, dict): raise ConfigError(f"Config error: {context}.on_status must be a mapping.") on_status: dict[str, str] = {} @@ -666,4 +675,11 @@ def _parse_on_status(raw: dict[str, Any], context: str) -> dict[str, str] | None f"Config error: {context}.on_status.{key} must be a non-empty string." ) on_status[key] = value + if on_pass is not None: + existing_pass = on_status.get("pass") + if existing_pass is not None and existing_pass != on_pass: + raise ConfigError( + f"Config error: {context}.on_pass conflicts with on_status.pass." + ) + on_status["pass"] = on_pass return on_status diff --git a/nightshift/patches.py b/nightshift/patches.py index b1d2898..396844a 100644 --- a/nightshift/patches.py +++ b/nightshift/patches.py @@ -90,10 +90,9 @@ def repair_hunk_counts(patch: str) -> str: def parse_file_updates(text: str) -> tuple[FileUpdate, ...]: - """Parse model-supplied complete file content blocks.""" + """Parse fenced model-supplied complete file content blocks.""" updates: list[FileUpdate] = [] - updates.extend(_parse_delimited_file_updates(text)) pattern = re.compile( r"```(?:file|path)[:=](?P[^\n`]+)\n(?P.*?)```", flags=re.DOTALL | re.IGNORECASE, @@ -106,22 +105,44 @@ def parse_file_updates(text: str) -> tuple[FileUpdate, ...]: updates.append(FileUpdate(path=path, content=content)) if not updates: raise PipelineError( - "File writer error: no file blocks found. Expected fenced blocks like ```file:path.to." + "File writer error: no fenced file blocks found. Expected fenced blocks like ```file:path.to." ) return tuple(updates) -def _parse_delimited_file_updates(text: str) -> list[FileUpdate]: +def parse_delimited_file_updates(text: str) -> tuple[FileUpdate, ...]: + """Parse delimiter file blocks used by prose and story-state writer stages.""" + + updates: list[FileUpdate] = [] + header_pattern = re.compile(r"(?m)^FILE:\s*(?P[^\n]+)\n---CONTENT---\n") + matches = list(header_pattern.finditer(text)) + for index, match in enumerate(matches): + path = match.group("path").strip().strip("`") + content_start = match.end() + next_file_start = matches[index + 1].start() if index + 1 < len(matches) else len(text) + raw_content = text[content_start:next_file_start] + end_match = re.search(r"(?m)^---END---\s*$", raw_content) + if end_match: + raw_content = raw_content[: end_match.start()] + content = raw_content.rstrip("\r\n") + "\n" + if path: + updates.append(FileUpdate(path=path, content=content)) + if updates: + return tuple(updates) + pattern = re.compile( r"(?ms)^FILE:\s*(?P[^\n]+)\n---CONTENT---\n(?P.*?)\n---END---\s*$" ) - updates: list[FileUpdate] = [] for match in pattern.finditer(text): path = match.group("path").strip().strip("`") content = match.group("content") if path: updates.append(FileUpdate(path=path, content=content + "\n")) - return updates + if not updates: + raise PipelineError( + "File writer error: no delimiter file blocks found. Expected FILE: path with ---CONTENT---/---END---." + ) + return tuple(updates) def generate_patch_from_file_updates( diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py index afec69d..0b313c4 100644 --- a/nightshift/pipeline.py +++ b/nightshift/pipeline.py @@ -30,6 +30,7 @@ from .patches import ( format_validation_result, generate_patch_from_file_updates, normalize_patch_text, + parse_delimited_file_updates, parse_file_updates, validate_patch, ) @@ -200,41 +201,22 @@ class PipelineRunner: retry_notes.append(f"Context update from '{stage.id}': {result.context_update}") if result.status == "pass": - if stage.on_status and "pass" in stage.on_status: - target = stage.on_status["pass"] - if target not in stage_indexes: - final_status = "failed" - final_reason = ( - f"Stage '{stage.id}' on_status.pass references unknown stage '{target}'." - ) - break + pass_target_stage = (stage.on_status or {}).get("pass") or stage.on_pass or result.next_stage + if stage.type in {"agent_review", "review"} and result.next_stage: self.logger.event( - "stage.next", - "Jumping via on_status.pass", + "stage.next_ignored", + "Ignoring next_stage from passing review", run_id=self.artifacts.run_id, task_id=task.id, stage_id=stage.id, - next_stage=target, + requested_next_stage=result.next_stage, ) - index = stage_indexes[target] - continue - if stage.type in {"agent_review", "review"}: - if result.next_stage: - self.logger.event( - "stage.next_ignored", - "Ignoring next_stage from passing review", - run_id=self.artifacts.run_id, - task_id=task.id, - stage_id=stage.id, - requested_next_stage=result.next_stage, - ) - index += 1 - continue - if result.next_stage: - if result.next_stage not in stage_indexes: + pass_target_stage = (stage.on_status or {}).get("pass") or stage.on_pass + if pass_target_stage: + if pass_target_stage not in stage_indexes: final_status = "failed" final_reason = ( - f"Stage '{stage.id}' requested unknown next stage '{result.next_stage}'." + f"Stage '{stage.id}' requested unknown next stage '{pass_target_stage}'." ) break self.logger.event( @@ -243,9 +225,9 @@ class PipelineRunner: run_id=self.artifacts.run_id, task_id=task.id, stage_id=stage.id, - next_stage=result.next_stage, + next_stage=pass_target_stage, ) - index = stage_indexes[result.next_stage] + index = stage_indexes[pass_target_stage] continue index += 1 continue @@ -790,7 +772,7 @@ class PipelineRunner: while True: updates: tuple[FileUpdate, ...] = () try: - updates = parse_file_updates(stdout) + updates = _parse_file_writer_updates(stage, stdout) candidate_index_path = self._write_file_writer_candidates( task.id, stage, @@ -843,7 +825,7 @@ class PipelineRunner: ) break if ( - "no file blocks found" in str(exc) + "file blocks found" in str(exc) and "diff --git " not in stdout and not invalid_rerun_done ): @@ -1842,6 +1824,8 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int lowered = output.lower() if "```file:" in lowered or "```path:" in lowered: lines.append("The output started a file block, but NightShift could not parse a complete closed block.") + elif re.search(r"(?m)^FILE:\s*[^\n]+\n---CONTENT---\n", output): + lines.append("The output started a delimiter file block, but NightShift could not parse a complete block.") else: lines.append("The output did not contain a parseable file block.") excerpt = output.strip() @@ -1865,6 +1849,12 @@ def _resolve_retry_target_stage(stage: StageConfig, result: StageResult) -> str return (stage.on_status or {}).get(result.status) or stage.on_fail or result.next_stage +def _parse_file_writer_updates(stage: StageConfig, stdout: str) -> tuple[FileUpdate, ...]: + if _is_writing_file_writer_stage(stage): + return parse_delimited_file_updates(stdout) + return parse_file_updates(stdout) + + def _previous_continuity_review_passed(previous_outputs: dict[str, str]) -> bool: for name, output in previous_outputs.items(): if "continuity" in name and re.search(r"(?im)^status:\s*pass\s*$", output): @@ -1938,10 +1928,10 @@ def _filter_allowed_file_updates(updates: tuple[FileUpdate, ...], stage: StageCo def _file_writer_repair_format_note(stage: StageConfig) -> str: - if _is_state_update_stage(stage): + if _is_writing_file_writer_stage(stage): return ( "Use delimiter file blocks only: FILE: path, ---CONTENT---, complete file content, " - "---END---. Do not use markdown code fences for state update output." + "---END---. Do not use markdown code fences for writing output." ) return "Use complete fenced file blocks with both the opening ```file:path and closing ``` fence." diff --git a/nightshift/templates.py b/nightshift/templates.py index 1c6e586..577a0da 100644 --- a/nightshift/templates.py +++ b/nightshift/templates.py @@ -73,7 +73,7 @@ pipeline: # pass: summarize # retry: implement # fail: plan - # escalate: human + # escalate: summarize on_fail: implement output: review.md diff --git a/tests/test_config.py b/tests/test_config.py index 25d7f64..c35cb58 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -77,6 +77,26 @@ class ConfigTests(unittest.TestCase): }) self.assertIsNone(review_stage.on_fail) + def test_on_pass_loads_as_legacy_alias(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + init_project(root) + config_path = root / "nightshift.yaml" + config_path.write_text( + config_path.read_text(encoding="utf-8").replace( + " output: plan.md", + " output: plan.md\n on_pass: summarize", + 1, + ), + encoding="utf-8", + ) + + config = load_config(config_path) + plan_stage = next(stage for stage in config.pipeline.stages if stage.id == "plan") + + self.assertEqual(plan_stage.on_pass, "summarize") + self.assertEqual(plan_stage.on_status, {"pass": "summarize"}) + def test_on_status_rejects_invalid_key(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) @@ -107,6 +127,24 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ConfigError, "on_status.fail references unknown stage"): load_config(config_path) + def test_skip_repo_parts_loads(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + init_project(root) + config_path = root / "nightshift.yaml" + config_path.write_text( + config_path.read_text(encoding="utf-8").replace( + " allowed_commands:", + " skip_repo_parts:\n - dist\n allowed_commands:", + 1, + ), + encoding="utf-8", + ) + + config = load_config(config_path) + + self.assertEqual(config.safety.skip_repo_parts, ("dist",)) + def test_validate_requires_prompt_files(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_patches.py b/tests/test_patches.py index 20a8a52..ee9bc02 100644 --- a/tests/test_patches.py +++ b/tests/test_patches.py @@ -7,6 +7,7 @@ from nightshift.errors import PipelineError from nightshift.patches import ( generate_patch_from_file_updates, normalize_patch_text, + parse_delimited_file_updates, parse_file_updates, repair_hunk_counts, validate_patch, @@ -248,7 +249,7 @@ test self.assertEqual(result.files, ("src/app.py", "src/test_app.py")) def test_file_updates_parse_explicit_delimiters(self) -> None: - updates = parse_file_updates( + updates = parse_delimited_file_updates( """FILE: story/chapters/chapter-001/scene-001.md ---CONTENT--- Sunlight did not belong here. @@ -261,7 +262,7 @@ Sunlight did not belong here. self.assertEqual(updates[0].content, "Sunlight did not belong here.\n") def test_file_updates_parse_delimiters_without_end_before_next_file(self) -> None: - updates = parse_file_updates( + updates = parse_delimited_file_updates( """Intro prose is ignored. FILE: story/plot-state.md @@ -285,7 +286,7 @@ FILE: story/timeline.md self.assertEqual(updates[1].content, "# Timeline\n\n- SCENE-002 complete.\n") def test_file_updates_parse_mixed_delimiter_end_and_next_file(self) -> None: - updates = parse_file_updates( + updates = parse_delimited_file_updates( """FILE: story/plot-state.md ---CONTENT--- first @@ -301,6 +302,16 @@ second self.assertEqual(updates[0].content, "first\n") self.assertEqual(updates[1].content, "second\n") + def test_code_file_updates_reject_delimiter_blocks(self) -> None: + with self.assertRaisesRegex(PipelineError, "no fenced file blocks"): + parse_file_updates( + """FILE: src/app.py +---CONTENT--- +print("hello") +---END--- +""" + ) + def test_file_updates_reject_duplicate_blocks(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 61dcae3..65f3768 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -238,11 +238,77 @@ class PipelineRunnerTests(unittest.TestCase): result = runner.run_task(task) - self.assertEqual(result.status, "failed") - self.assertEqual(result.retry_count, 2) + self.assertEqual(result.status, "complete") + self.assertEqual(result.retry_count, 1) self.assertEqual( [r.stage_id for r in result.stage_results], - ["plan", "review", "implement", "review", "implement", "review"], + ["plan", "review", "implement"], + ) + + def test_on_status_pass_jumps_to_configured_stage(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + stages = ( + StageConfig(id="first", type="agent", agent="planner", output="first.md", on_status={"pass": "third"}), + StageConfig( + id="second", + type="command", + commands=('python -c "print(\'should not run\')"',), + output="second-output.txt", + ), + StageConfig(id="third", type="summarize", output="final-notes.md"), + ) + config = make_config(root, stages) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + + result = runner.run_task(parse_tasks(TASK_MD)[0]) + + task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" + self.assertEqual(result.status, "complete") + self.assertEqual([item.stage_id for item in result.stage_results], ["first", "third"]) + self.assertFalse((task_dir / "second-output.txt").exists()) + + def test_on_status_failure_takes_priority_over_agent_next_stage(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + config = make_config(root, (), max_retries=1) + config.agents["reviewer"] = AgentConfig( + id="reviewer", + backend="command", + command=( + "python -c \"print('status: retry\\nreason: needs plan repair\\n" + "next_stage: implement')\"" + ), + system_prompt=Path("reviewer.md"), + ) + config = replace( + config, + pipeline=PipelineConfig( + max_task_retries=1, + stages=( + StageConfig(id="plan", type="agent", agent="planner", output="plan.md"), + StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"), + StageConfig( + id="review", + type="agent_review", + agent="reviewer", + on_status={"retry": "plan"}, + on_fail="implement", + output="review.md", + ), + ), + ), + ) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + + result = runner.run_task(parse_tasks(TASK_MD)[0]) + + self.assertEqual(result.retry_count, 1) + self.assertEqual( + [item.stage_id for item in result.stage_results], + ["plan", "implement", "review", "plan", "implement", "review"], ) def test_task_preflight_fails_when_task_specific_test_file_is_missing(self) -> None: @@ -963,12 +1029,14 @@ Acceptance Criteria: (root / "fake_writer.py").write_text( "\n".join( [ - "print('```file:story/chapters/scene.md')", + "print('FILE: story/chapters/scene.md')", + "print('---CONTENT---')", "print('scene prose')", - "print('```')", - "print('```file:story/plot-state.md')", + "print('---END---')", + "print('FILE: story/plot-state.md')", + "print('---CONTENT---')", "print('state')", - "print('```')", + "print('---END---')", ] ), encoding="utf-8", diff --git a/tests/test_repo_tools.py b/tests/test_repo_tools.py index f5e0116..feba61a 100644 --- a/tests/test_repo_tools.py +++ b/tests/test_repo_tools.py @@ -82,6 +82,30 @@ lookup_requests: self.assertIn("src/app.py:1", matches) self.assertNotIn(".nightshift", matches) + def test_repo_tools_skip_configured_path_parts(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + (root / "dist").mkdir() + (root / "dist" / "bundle.js").write_text("needle\n", encoding="utf-8") + (root / "src").mkdir() + (root / "src" / "app.js").write_text("needle\n", encoding="utf-8") + safety = SafetyConfig( + require_clean_worktree=False, + scoped_paths=(".",), + allowed_commands=(), + forbidden_commands=(), + skip_repo_parts=("dist",), + ) + tools = RepoTools(root, safety, ArtifactStore(root, ".nightshift", run_id="test-run")) + + files = tools.list_files(".") + matches = tools.grep("needle", ".") + + self.assertIn("src/app.js", files) + self.assertNotIn("dist/bundle.js", files) + self.assertIn("src/app.js:1", matches) + self.assertNotIn("dist/bundle.js", matches) + if __name__ == "__main__": unittest.main()