diff --git a/agents/implementer.md b/agents/implementer.md index f5eb644..74dc3fe 100644 --- a/agents/implementer.md +++ b/agents/implementer.md @@ -1,9 +1,13 @@ You are the implementation agent for NightShift. -Output only a unified diff. -Do not wrap the patch in markdown fences. +Output only complete file content blocks. +Use one fenced block per changed file: + +```file:relative/path.py + +``` + Do not include explanations before or after the patch. -Use diff --git headers. Include tests when needed. Keep the change as small as possible. -Only edit files needed for the task. \ No newline at end of file +Only edit files needed for the task. diff --git a/docs/config-reference.md b/docs/config-reference.md index 35884e7..bde3367 100644 --- a/docs/config-reference.md +++ b/docs/config-reference.md @@ -68,11 +68,12 @@ Patch validator stage options: - `max_lines`: max changed lines. - `forbidden_paths`: paths the patch must not touch. - Unified diff hunk line prefixes and hunk line counts are validated before patch apply. +- The patch normalizer recomputes hunk line counts from hunk bodies for direct unified diff output. Writer stages: - `code_writer`: agent returns a unified diff directly. -- `file_writer`: agent returns complete file content blocks; NightShift generates the unified diff deterministically. +- `file_writer`: agent returns complete file content blocks; NightShift generates the unified diff deterministically. Prefer this for local models that wrap or miscount long patch hunks. `file_writer` blocks use this form: diff --git a/docs/design.md b/docs/design.md index 0031a5f..5012ca4 100644 --- a/docs/design.md +++ b/docs/design.md @@ -874,7 +874,7 @@ NightShift currently provides: * Context pack generation * Unified diff code-writing contract * Deterministic diff generation from model-supplied complete file blocks -* Patch normalization, validation, dry-run, and apply modes +* Patch normalization, deterministic hunk-count repair, validation, dry-run, and apply modes * Per-attempt retry patch artifacts such as `repair-1.patch`, `normalized-1.patch`, and `patch-validation-1.md` * Test/static failure repair loops via bounded stage retries * Prompt bundle construction with project, task, retry, and previous-stage context @@ -1025,7 +1025,7 @@ The next important additions are: Continue improving per-attempt artifact preservation. Patch retries now preserve files such as `repair-1.patch`, `normalized-1.patch`, and `patch-validation-1.md`; future work should add richer latest-attempt indexes and dashboard navigation. 8. Patch repair stage - Add an explicit patch repair or strict normalizer stage that receives the invalid patch, validation error, and relevant source excerpts, then returns a complete replacement patch. This stage should remain bounded by strict validation and should not silently guess intent for arbitrary malformed hunks. + Hunk counts are now deterministically recomputed during normalization for direct unified diff output. Future work should add an explicit patch repair stage for malformed hunk bodies that receives the invalid patch, validation error, and relevant source excerpts, then returns a complete replacement patch. This stage should remain bounded by strict validation and should not silently guess intent for arbitrary malformed hunks. 9. Richer dashboard Add task/stage navigation, patch views, validation status, run log tail, and artifact links without adding mutation controls. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 7196fcb..85df589 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -20,6 +20,10 @@ If `require_clean_worktree: true`, NightShift blocks dirty repositories before c The `ollama` backend uses Ollama's local HTTP API, normally at `http://localhost:11434/api/generate`. Confirm Ollama is running and the configured model is available with `ollama list` or `ollama pull `. Tests do not require Ollama. +## Patch validation reports hunk count mismatch + +Use `file_writer` for local model runs when possible. It asks the model for complete file blocks and lets NightShift generate the unified diff. For direct `code_writer` patches, the normalizer now recomputes hunk counts before validation, but malformed hunk bodies still fail validation. + ## Flask dashboard fails Install Flask: diff --git a/nightshift.yaml b/nightshift.yaml index d3ae41a..a953a0a 100644 --- a/nightshift.yaml +++ b/nightshift.yaml @@ -48,7 +48,7 @@ pipeline: output: context-pack.md - id: implement - type: code_writer + type: file_writer agent: implementer output: proposed.patch @@ -86,4 +86,4 @@ pipeline: - id: summarize type: summarize - output: final-notes.md \ No newline at end of file + output: final-notes.md diff --git a/nightshift/patches.py b/nightshift/patches.py index 6dcc5e9..a8903fe 100644 --- a/nightshift/patches.py +++ b/nightshift/patches.py @@ -59,7 +59,34 @@ def normalize_patch_text(text: str) -> str: patch = extract_unified_diff(text) if "@@" not in patch: raise PipelineError("Patch error: unified diff has no hunks.") - return patch + return repair_hunk_counts(patch) + + +def repair_hunk_counts(patch: str) -> str: + """Rewrite unified diff hunk counts from the actual hunk body.""" + + lines = patch.splitlines() + repaired: list[str] = [] + index = 0 + while index < len(lines): + line = lines[index] + if not line.startswith("@@"): + repaired.append(line) + index += 1 + continue + + body: list[str] = [] + body_index = index + 1 + while body_index < len(lines): + next_line = lines[body_index] + if next_line.startswith("@@") or next_line.startswith("diff --git "): + break + body.append(next_line) + body_index += 1 + repaired.append(_format_hunk_header(line, body, index + 1)) + repaired.extend(body) + index = body_index + return "\n".join(repaired).rstrip() + "\n" def parse_file_updates(text: str) -> tuple[FileUpdate, ...]: @@ -290,9 +317,9 @@ def _validate_hunk_counts(patch: str) -> None: if line.startswith(" "): current["old_actual"] += 1 current["new_actual"] += 1 - elif line.startswith("-") and not line.startswith("---"): + elif line.startswith("-"): current["old_actual"] += 1 - elif line.startswith("+") and not line.startswith("+++"): + elif line.startswith("+"): current["new_actual"] += 1 flush(len(patch.splitlines()) + 1) @@ -318,6 +345,39 @@ def _parse_hunk_header(line: str, line_number: int) -> dict[str, int]: } +def _format_hunk_header(line: str, body: list[str], line_number: int) -> str: + match = re.match( + r"^@@ -(?P\d+)(?:,(?P\d+))? " + r"\+(?P\d+)(?:,(?P\d+))? @@(?P
.*)$", + line, + ) + if not match: + raise PipelineError( + f"Patch validation failed: malformed hunk header at line {line_number}." + ) + old_count = 0 + new_count = 0 + for body_line in body: + if body_line.startswith("\\"): + continue + if body_line.startswith(" "): + old_count += 1 + new_count += 1 + elif body_line.startswith("-"): + old_count += 1 + elif body_line.startswith("+"): + new_count += 1 + return ( + f"@@ -{match.group('old_start')}{_format_count(old_count)} " + f"+{match.group('new_start')}{_format_count(new_count)} @@" + f"{match.group('section')}" + ) + + +def _format_count(count: int) -> str: + return "" if count == 1 else f",{count}" + + def _validate_file_states(patch: str, root: Path) -> None: current_path: str | None = None current_is_new = False @@ -352,8 +412,15 @@ def _validate_file_states(patch: str, root: Path) -> None: def _changed_line_count(patch: str) -> int: count = 0 + in_hunk = False for line in patch.splitlines(): - if line.startswith(("+++", "---")): + if line.startswith("diff --git "): + in_hunk = False + continue + if line.startswith("@@"): + in_hunk = True + continue + if not in_hunk or line.startswith("\\"): continue if line.startswith(("+", "-")): count += 1 diff --git a/nightshift/pipeline.py b/nightshift/pipeline.py index 538a62a..3f2bbeb 100644 --- a/nightshift/pipeline.py +++ b/nightshift/pipeline.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path import re @@ -202,10 +202,7 @@ class PipelineRunner: retry_count=retry_count, next_stage=target_stage, ) - retry_notes.append( - f"Retry {retry_count}: stage '{stage.id}' returned " - f"{result.status} ({result.reason}); redirecting to '{target_stage}'." - ) + retry_notes.append(self._format_retry_note(retry_count, stage, result, target_stage)) index = stage_indexes[target_stage] continue @@ -441,8 +438,9 @@ class PipelineRunner: if chart_path.exists(): enriched_outputs["project-context-chart.md"] = chart_path.read_text(encoding="utf-8", errors="replace") context = self.context.read_context(task, retry_notes) + agent_stage = self._writer_agent_stage(stage, retry_count) result = self.agent_executor.run_stage( - stage, + agent_stage, task, enriched_outputs, retry_notes, @@ -473,7 +471,7 @@ class PipelineRunner: "Repository lookup results have been provided. Return the unified diff now; do not request more lookups.", ] result = self.agent_executor.run_stage( - stage, + agent_stage, task, rerun_outputs, rerun_notes, @@ -538,8 +536,9 @@ class PipelineRunner: if chart_path.exists(): enriched_outputs["project-context-chart.md"] = chart_path.read_text(encoding="utf-8", errors="replace") context = self.context.read_context(task, retry_notes) + agent_stage = self._writer_agent_stage(stage, retry_count) result = self.agent_executor.run_stage( - stage, + agent_stage, task, enriched_outputs, retry_notes, @@ -570,7 +569,7 @@ class PipelineRunner: "Repository lookup results have been provided. Return complete file blocks now; do not request more lookups.", ] result = self.agent_executor.run_stage( - stage, + agent_stage, task, rerun_outputs, rerun_notes, @@ -588,14 +587,28 @@ class PipelineRunner: self.config.safety, forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS, ) + patch_reason = "Deterministic patch written from file blocks." + log_message = "Wrote deterministic patch from file blocks" except PipelineError as exc: - summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md" - self.artifacts.write_stage_output( - task.id, - summary_filename, - f"# Implementation Summary\n\nStatus: fail\nReason: {exc}\n", - ) - return StageResult(stage.id, "fail", str(exc), output_path=result.output_path) + try: + patch = normalize_patch_text(stdout) + except PipelineError: + summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md" + reason = str(exc) + if "generated patch has no changes" in reason and retry_count: + reason = ( + "File writer error: repair output produced no changes relative to " + "the current workspace. The previous patch was applied, tests failed, " + "and the repair attempt repeated the already-applied file content." + ) + self.artifacts.write_stage_output( + task.id, + summary_filename, + f"# Implementation Summary\n\nStatus: fail\nReason: {reason}\n", + ) + return StageResult(stage.id, "fail", reason, output_path=result.output_path) + patch_reason = "Fallback patch written from unified diff output." + log_message = "Wrote fallback patch from unified diff output" patch_filename = "repair-{0}.patch".format(retry_count) if retry_count else (stage.output or "proposed.patch") summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md" proposed_path = self.artifacts.write_stage_output(task.id, patch_filename, patch) @@ -611,7 +624,7 @@ class PipelineRunner: ) self.logger.event( "artifact.write", - "Wrote deterministic patch from file blocks", + log_message, stage_id=stage.id, task_id=task.id, artifact_path=proposed_path.relative_to(self.config.project.root), @@ -619,11 +632,15 @@ class PipelineRunner: return StageResult( stage.id, "pass", - "Deterministic patch written from file blocks.", + patch_reason, output_path=str(proposed_path.relative_to(self.config.project.root)), context_update=f"Implementation summary: {summary_path.relative_to(self.config.project.root).as_posix()}", ) + def _writer_agent_stage(self, stage: StageConfig, retry_count: int) -> StageConfig: + suffix = f"-{retry_count}" if retry_count else "" + return replace(stage, output=f"{stage.id}-agent-output{suffix}.md") + def _run_patch_normalizer_stage( self, stage: StageConfig, @@ -888,6 +905,51 @@ class PipelineRunner: return "" return path.read_text(encoding="utf-8") + def _format_retry_note( + self, + retry_count: int, + stage: StageConfig, + result: StageResult, + target_stage: str, + ) -> str: + note = ( + f"Retry {retry_count}: stage '{stage.id}' returned " + f"{result.status} ({result.reason}); redirecting to '{target_stage}'." + ) + excerpt = self._failure_excerpt(result.output_path) + if not excerpt: + return note + return f"{note}\n\nRelevant failure output:\n```text\n{excerpt}\n```" + + def _failure_excerpt(self, output_path: str | None, max_chars: int = 3500) -> str: + content = self._read_output(output_path) + if not content.strip(): + return "" + patterns = ( + "error", + "fail", + "traceback", + "assertionerror", + "exception", + "exit code", + "stderr", + "stdout", + "timed out", + ) + lines = content.splitlines() + selected = [ + line + for line in lines + if any(pattern in line.lower() for pattern in patterns) + ] + excerpt = "\n".join(selected).strip() + if len(excerpt) < 400: + excerpt = content.strip() + excerpt = re.sub(r"\n{4,}", "\n\n\n", excerpt) + if len(excerpt) <= max_chars: + return excerpt + return excerpt[:max_chars].rstrip() + "\n... " + def format_summary_stage( task: Task, previous_outputs: dict[str, str], diff --git a/nightshift/reports.py b/nightshift/reports.py index 9a6e808..633a318 100644 --- a/nightshift/reports.py +++ b/nightshift/reports.py @@ -16,6 +16,7 @@ class TaskReport: final_notes_path: Path stage_results_path: Path run_summary_path: Path + devlog_path: Path class ReportGenerator: @@ -77,7 +78,19 @@ class ReportGenerator: ), encoding="utf-8", ) - return TaskReport(final_notes_path, stage_results_path, self.artifacts.run_summary_path) + devlog_path = self.artifacts.run_dir / "devlog.md" + devlog_path.write_text( + format_devlog( + task=task, + status=status, + reason=reason, + retry_count=retry_count, + stage_results=stage_results, + modified_files=modified_files, + ), + encoding="utf-8", + ) + return TaskReport(final_notes_path, stage_results_path, self.artifacts.run_summary_path, devlog_path) def format_stage_results( @@ -205,6 +218,99 @@ def format_run_summary( ) +def format_devlog( + task: Task, + status: str, + reason: str, + retry_count: int, + stage_results: list[StageResult], + modified_files: list[str], +) -> str: + lines = [ + "# Devlog", + "", + f"Task `{task.id}`: {task.title}", + "", + f"Status: {status.upper()}", + f"Retries: {retry_count}", + f"Outcome: {reason}", + "", + ] + stage_titles = { + "agent": "Agent", + "agent_review": "Reviewer", + "code_writer": "Implementer", + "file_writer": "Implementer", + "patch_normalizer": "Normalizer", + "patch_validator": "Patch validator", + "patch_apply": "Patch apply", + "command": "Command", + "repo_context": "Context builder", + "summarize": "Summarizer", + } + for result in stage_results: + label = _devlog_stage_label(result.stage_id, stage_titles) + verb = _devlog_verb(label, result.status) + lines.extend( + [ + f"## {label}", + "", + f"{verb}:", + f"- Status: {result.status}", + f"- Reason: {result.reason}", + ] + ) + if result.output_path: + lines.append(f"- Artifact: `{result.output_path}`") + if result.context_update: + lines.append(f"- Note: {result.context_update}") + lines.append("") + lines.extend( + [ + "## Modified Files", + "", + *([f"- `{path}`" for path in modified_files] if modified_files else ["- None detected"]), + "", + ] + ) + return "\n".join(lines) + + +def _devlog_stage_label(stage_id: str, stage_titles: dict[str, str]) -> str: + normalized = stage_id.lower() + if "plan" in normalized: + return "Planner" + if "implement" in normalized or "write" in normalized: + return "Implementer" + if "review" in normalized: + return "Reviewer" + if "test" in normalized: + return "Tests" + if "context" in normalized: + return "Context builder" + if "validate" in normalized: + return "Patch validator" + if "apply" in normalized: + return "Patch apply" + if "normalize" in normalized: + return "Normalizer" + return stage_titles.get(normalized, stage_id.replace("_", " ").title()) + + +def _devlog_verb(label: str, status: str) -> str: + if label == "Planner": + return "Planner proposed" + if label == "Implementer": + return "Implementer tried" + if label == "Reviewer": + return "Reviewer responded" + if label == "Tests": + return "Tests reported" + if status == "fail": + return f"{label} stopped" + return f"{label} completed" + + def collect_modified_files(project_root: Path) -> list[str]: try: completed = subprocess.run( diff --git a/nightshift/web.py b/nightshift/web.py index 6be1def..ba05b43 100644 --- a/nightshift/web.py +++ b/nightshift/web.py @@ -15,7 +15,10 @@ class RunInfo: name: str path: Path summary: str + devlog: str + status: str log_tail: tuple[str, ...] = () + artifacts: tuple[str, ...] = () def list_runs(artifact_dir: str | Path) -> list[RunInfo]: @@ -25,13 +28,18 @@ def list_runs(artifact_dir: str | Path) -> list[RunInfo]: runs: list[RunInfo] = [] for path in sorted((item for item in runs_dir.iterdir() if item.is_dir()), reverse=True): summary_path = path / "run-summary.md" + devlog_path = path / "devlog.md" summary = summary_path.read_text(encoding="utf-8") if summary_path.exists() else "No run summary yet." + devlog = devlog_path.read_text(encoding="utf-8") if devlog_path.exists() else "No devlog yet." runs.append( RunInfo( name=path.name, path=path, summary=summary, + devlog=devlog, + status=_status_from_summary(summary), log_tail=tuple(tail_lines(path / "run.log", limit=100)), + artifacts=tuple(_artifact_paths(path)), ) ) return runs @@ -51,28 +59,60 @@ def read_artifact(run_path: Path, relative_path: str) -> str: def render_dashboard(artifact_dir: str | Path) -> str: runs = list_runs(artifact_dir) body = [ - "

NightShift Dashboard

", '', - "

Showing artifact files from the newest run first. This page is read-only and refreshes every 5 seconds.

", + _style_block(), + '
', + '
', + '

Local artifact dashboard

NightShift

', + '
Read-only run review. Auto-refreshes every 5 seconds.
', + "
", ] if not runs: - body.append("

No runs found.

") + body.append('
No runs found.
') for index, run in enumerate(runs): title = "Latest Run" if index == 0 else "Older Run" + status_class = _status_class(run.status) + artifact_links = "\n".join( + f'{escape(path)}' + for path in run.artifacts[:18] + ) + artifact_body = artifact_links or 'No artifacts yet.' body.extend( [ - f"

{title}: {escape(run.name)}

", - "
",
+                '
', + '
', + f'

{title}

{escape(run.name)}

', + f'{escape(run.status.upper())}', + "
", + '
', + '
', + "

Devlog

", + '
',
+                escape(run.devlog),
+                "
", + "
", + '
', + "

Run Summary

", + '
',
                 escape(run.summary),
                 "
", + "
", + '
', "

Log Tail

", - "
",
+                '
',
                 escape("\n".join(run.log_tail) if run.log_tail else "No run log yet."),
                 "
", + "
", + '
', + "

Artifacts

", + f'
{artifact_body}
', + "
", + "
", "
", ] ) - return "\n".join(["", "", *body, ""]) + body.append("
") + return "\n".join(["", '', *body, ""]) def create_app(project_root: str | Path = ".", artifact_dir: str | Path = ".nightshift"): @@ -96,8 +136,152 @@ def create_app(project_root: str | Path = ".", artifact_dir: str | Path = ".nigh @app.get("/runs//") def artifact(run_id: str, artifact_path: str): content = read_artifact(artifacts / "runs" / run_id, artifact_path) - response = Response(f"
{escape(content)}
", mimetype="text/html") + response = Response( + "\n".join( + [ + "", + '', + _style_block(), + '
', + f'Back to dashboard', + f'

{escape(artifact_path)}

{escape(content)}
', + "
", + "", + ] + ), + mimetype="text/html", + ) response.headers["Cache-Control"] = "no-store, max-age=0" return response return app + + +def _artifact_paths(run_path: Path) -> list[str]: + if not run_path.exists(): + return [] + paths = [ + path.relative_to(run_path).as_posix() + for path in run_path.rglob("*") + if path.is_file() + ] + priority = { + "devlog.md": 0, + "run-summary.md": 1, + "run.log": 2, + } + return sorted(paths, key=lambda item: (priority.get(item, 10), item)) + + +def _status_from_summary(summary: str) -> str: + for line in summary.splitlines(): + normalized = line.strip().lower() + if normalized.startswith("- status:"): + return normalized.split(":", 1)[1].strip() or "running" + if normalized.startswith("status:"): + return normalized.split(":", 1)[1].strip() or "running" + return "running" + + +def _status_class(status: str) -> str: + normalized = status.lower() + if normalized in {"complete", "completed", "pass", "passed"}: + return "complete" + if normalized in {"failed", "fail", "error"}: + return "failed" + return "running" + + +def _style_block() -> str: + return """ + +""" diff --git a/tests/test_patches.py b/tests/test_patches.py index 2e5883e..541987a 100644 --- a/tests/test_patches.py +++ b/tests/test_patches.py @@ -8,6 +8,7 @@ from nightshift.patches import ( generate_patch_from_file_updates, normalize_patch_text, parse_file_updates, + repair_hunk_counts, validate_patch, ) @@ -122,6 +123,58 @@ new file mode 100644 with self.assertRaisesRegex(PipelineError, "new line count expected 2, got 1"): validate_patch(patch, root, safety) + def test_normalize_repairs_hunk_count_mismatch(self) -> None: + lines = "\n".join(f"+line {number}" for number in range(38)) + patch = f"""diff --git a/src/app.py b/src/app.py +--- /dev/null ++++ b/src/app.py +@@ -0,0 +1,40 @@ +{lines} +""" + + normalized = normalize_patch_text(patch) + + self.assertIn("@@ -0,0 +1,38 @@", normalized) + + def test_validate_patch_counts_hunk_lines_that_look_like_headers(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + (root / "src").mkdir() + safety = SafetyConfig( + require_clean_worktree=False, + scoped_paths=("src",), + allowed_commands=(), + forbidden_commands=(), + ) + patch = """diff --git a/src/app.py b/src/app.py +--- a/src/app.py ++++ b/src/app.py +@@ -1,3 +1,3 @@ + context +--- +---- ++++ +++++ +""" + + result = validate_patch(patch, root, safety) + + self.assertEqual(result.changed_lines, 4) + + def test_repair_hunk_counts_counts_header_like_body_lines(self) -> None: + patch = """diff --git a/src/app.py b/src/app.py +--- a/src/app.py ++++ b/src/app.py +@@ -1 +1 @@ + context +--- ++++ +""" + + repaired = repair_hunk_counts(patch) + + self.assertIn("@@ -1,2 +1,2 @@", repaired) + def test_validate_patch_accepts_multiple_files(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f90dbd6..3a8ac02 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -455,10 +455,49 @@ Acceptance Criteria: result = runner.run_task(parse_tasks(TASK_MD)[0]) patch = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "proposed.patch" + agent_output = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "write-agent-output.md" self.assertEqual(result.status, "complete") + self.assertTrue(agent_output.exists()) self.assertIn("diff --git a/app.py b/app.py", patch.read_text(encoding="utf-8")) self.assertIn("diff --git a/tests/test_app.py b/tests/test_app.py", patch.read_text(encoding="utf-8")) + def test_file_writer_accepts_unified_diff_fallback(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + _write_common_files(root) + (root / "app.py").write_text("old\n", encoding="utf-8") + (root / "fake_writer.py").write_text( + "\n".join( + [ + "print('diff --git a/app.py b/app.py')", + "print('--- a/app.py')", + "print('+++ b/app.py')", + "print('@@ -1 +1,4 @@')", + "print('-old')", + "print('+new')", + ] + ), + encoding="utf-8", + ) + stages = ( + StageConfig(id="write", type="file_writer", agent="writer"), + StageConfig(id="validate", type="patch_validator"), + ) + config = make_config(root, stages) + config.agents["writer"] = AgentConfig( + id="writer", + backend="command", + command="python fake_writer.py", + system_prompt=Path("planner.md"), + ) + runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run")) + + result = runner.run_task(parse_tasks(TASK_MD)[0]) + + patch = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "proposed.patch" + self.assertEqual(result.status, "complete") + self.assertIn("@@ -1 +1 @@", patch.read_text(encoding="utf-8")) + def test_patch_validator_rejects_unsafe_patch(self) -> None: with tempfile.TemporaryDirectory() as directory: root = Path(directory) @@ -615,7 +654,7 @@ Acceptance Criteria: ), encoding="utf-8", ) - test_command = 'python -c "from pathlib import Path; raise SystemExit(0 if Path(\'app.py\').read_text().strip() == \'new\' else 1)"' + test_command = 'python -c "from pathlib import Path; import sys; ok = Path(\'app.py\').read_text().strip() == \'new\'; sys.stderr.write(\'expected new\\n\' if not ok else \'\'); raise SystemExit(0 if ok else 1)"' stages = ( StageConfig(id="write", type="code_writer", agent="writer"), StageConfig(id="normalize", type="patch_normalizer"), @@ -659,6 +698,10 @@ Acceptance Criteria: self.assertEqual((root / "app.py").read_text(encoding="utf-8"), "new\n") self.assertTrue((task_dir / "repair-1.patch").exists()) self.assertTrue((task_dir / "repair-summary-1.md").exists()) + self.assertIn( + "expected new", + (task_dir / "write-agent-output-1.md").read_text(encoding="utf-8"), + ) self.assertTrue((task_dir / "normalized-1.patch").exists()) self.assertTrue((task_dir / "patch-validation-1.md").exists()) self.assertTrue((task_dir / "applied-1.patch").exists()) diff --git a/tests/test_reports.py b/tests/test_reports.py index 5d2fa54..a8f475c 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -49,9 +49,11 @@ class ReportGeneratorTests(unittest.TestCase): self.assertTrue(report.final_notes_path.exists()) self.assertTrue(report.stage_results_path.exists()) self.assertTrue(report.run_summary_path.exists()) + self.assertTrue(report.devlog_path.exists()) self.assertIn("Retry count: 1", report.final_notes_path.read_text(encoding="utf-8")) self.assertIn("test", report.stage_results_path.read_text(encoding="utf-8")) self.assertIn("Final notes", report.run_summary_path.read_text(encoding="utf-8")) + self.assertIn("Tests reported", report.devlog_path.read_text(encoding="utf-8")) if __name__ == "__main__": diff --git a/tests/test_web.py b/tests/test_web.py index 83ab6b4..4b9fa71 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -18,7 +18,8 @@ class WebDashboardTests(unittest.TestCase): root = Path(directory) artifacts = ArtifactStore(root, ".nightshift", run_id="test-run") artifacts.initialize_run() - artifacts.run_summary_path.write_text("# Summary\n\nok", encoding="utf-8") + artifacts.run_summary_path.write_text("# Summary\n\n- Status: failed\n\nok", encoding="utf-8") + (artifacts.run_dir / "devlog.md").write_text("# Devlog\n\nPlanner proposed:\n- do this", encoding="utf-8") artifacts.run_log_path.write_text( "\n".join(f"line {index}" for index in range(120)), encoding="utf-8", @@ -30,10 +31,15 @@ class WebDashboardTests(unittest.TestCase): dashboard = render_dashboard(root / ".nightshift") self.assertEqual(len(runs), 1) + self.assertEqual(runs[0].status, "failed") self.assertEqual(len(runs[0].log_tail), 100) + self.assertIn("devlog.md", runs[0].artifacts) self.assertIn("ok", content) self.assertIn("escapes", escaped) self.assertIn("Log Tail", dashboard) + self.assertIn("Planner proposed", dashboard) + self.assertIn("FAILED", dashboard) + self.assertIn("artifact-link", dashboard) self.assertIn("line 119", dashboard) self.assertNotIn("line 19\n", dashboard)