Add more rigorous responses to test failures

2026-06-14 10:08:37 +00:00 · 2026-05-17 19:05:27 -07:00 · 2026-05-17 19:05:27 -07:00 · a3e1be75fe
commit a3e1be75fe
parent 75a8646708
14 changed files with 73 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -51,6 +51,7 @@ coverage.xml
 .pytest_cache/
 cover/
 tiny-lisp-nightshift/
 nightshift-imageboard/
 # Translations
 *.mo
--- a/docs/bugfix_todo.md
+++ b/docs/bugfix_todo.md
@ -1,5 +1,11 @@
 # Bugfix TODO
 ## Some kind of tool install feature
 Continually fails on flask_sqlalchemy until I install that.
 ## Tutorial need to include . directory for imageboard
 ## Git status artifacts are noisy for non-git repositories
 Observed artifact:
--- a/examples/example-environment/agents/implementer.md
+++ b/examples/example-environment/agents/implementer.md
--- a/examples/example-environment/agents/planner.md
+++ b/examples/example-environment/agents/planner.md
--- a/examples/example-environment/agents/reviewer.md
+++ b/examples/example-environment/agents/reviewer.md
--- a/examples/example-environment/nightshift.yaml
+++ b/examples/example-environment/nightshift.yaml
--- a/examples/example-environment/tasks.md
+++ b/examples/example-environment/tasks.md
--- a/examples/tutorial/01-imageboard/README.md
+++ b/examples/tutorial/01-imageboard/README.md
@ -47,7 +47,7 @@ python -m pip install -e .
 Install target project dependencies:
 ```bash
-python -m pip install flask pillow pytest
+python -m pip install flask pillow pytest flask_sqlalchemy
 ```
 Install and start Ollama, then make sure the model is available:
--- a/nightshift/agents.py
+++ b/nightshift/agents.py
@ -454,6 +454,8 @@ def output_contract_for(stage: StageConfig) -> str:
                "Do not include prose outside file blocks.",
                "Include every file needed for the task, including tests.",
                "NightShift will generate the unified diff deterministically.",
                "On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
                "Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
            ]
        )
    if stage.type == "patch_normalizer":
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -1015,6 +1015,9 @@ class PipelineRunner:
        content = self._read_output(output_path)
        if not content.strip():
            return ""
        cleaned_content = re.sub(r"\n{4,}", "\n\n\n", content.strip())
        if len(cleaned_content) <= max_chars:
            return cleaned_content
        patterns = (
            "error",
            "fail",
--- a/nightshift/project_templates/tutorial-imageboard/nightshift.yaml
+++ b/nightshift/project_templates/tutorial-imageboard/nightshift.yaml
@ -7,6 +7,7 @@ project:
 safety:
  require_clean_worktree: false
  scoped_paths:
    - .
    - src
    - tests
    - templates
--- a/nightshift/terminal.py
+++ b/nightshift/terminal.py
@ -73,6 +73,7 @@ def format_banner(stream: TextIO | None = None) -> str:
        "██║ ╚████║██║╚██████╔╝██║  ██║   ██║   ███████║██║  ██║██║██║        ██║   ",
        "╚═╝  ╚═══╝╚═╝ ╚═════╝ ╚═╝  ╚═╝   ╚═╝   ╚══════╝╚═╝  ╚═╝╚═╝╚═╝        ╚═╝   ",
        "",
        "      NightShift",
       f"      [ {quote} ]",
        "      [ planner | implementer | verifier | audit ]",
        "",
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -58,6 +58,20 @@ class AgentExecutorTests(unittest.TestCase):
        self.assertIn("Task context body", prompt)
        self.assertIn("- No retries", prompt)
    def test_file_writer_contract_mentions_repair_context(self) -> None:
        task = parse_tasks(TASK_MD)[0]
        prompt = build_prompt_bundle(
            system_prompt="System rules",
            stage=StageConfig(id="write", type="file_writer", agent="writer"),
            task=task,
            project_context="Project context",
            previous_outputs={},
            retry_notes=["Retry note"],
        )
        self.assertIn("On repair attempts", prompt)
        self.assertIn("failed stage output", prompt)
    def test_command_agent_writes_output_and_returns_pass(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -13,6 +13,7 @@ from nightshift.config import (
    StageConfig,
 )
 from nightshift.pipeline import PipelineRunner
 from nightshift.stages import StageResult
 from nightshift.tasks import parse_tasks
@ -328,6 +329,49 @@ Acceptance Criteria:
            self.assertIn("cli.py", content)
            self.assertIn("main@L1", content)
    def test_retry_note_keeps_small_failure_output_unfiltered(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
            config = make_config(root, ())
            runner = PipelineRunner(config, artifacts)
            output_path = artifacts.write_stage_output(
                "TASK-001",
                "test-output.txt",
                "\n".join(
                    [
                        "# Command Output: test",
                        "",
                        "### stdout",
                        "",
                        "```text",
                        "def test_board_route(self):",
                        "    response = self.client.get('/board/general')",
                        "    self.assertEqual(response.status_code, 200)",
                        "E   AssertionError: 404 != 200",
                        "```",
                        "",
                    ]
                ),
            )
            relative_output = str(output_path.relative_to(root))
            note = runner._format_retry_note(
                1,
                StageConfig(id="test", type="command", on_fail="write"),
                StageResult(
                    stage_id="test",
                    status="fail",
                    reason="Command exited with code 1: python -m pytest -q",
                    output_path=relative_output,
                ),
                "write",
            )
            self.assertIn("response = self.client.get('/board/general')", note)
            self.assertIn("self.assertEqual(response.status_code, 200)", note)
    def test_code_writer_normalizer_and_validator_pipeline(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)