Add more rigorous responses to test failures

This commit is contained in:
K. Hodges 2026-05-17 19:05:27 -07:00
parent 75a8646708
commit a3e1be75fe
14 changed files with 73 additions and 1 deletions

1
.gitignore vendored
View File

@ -51,6 +51,7 @@ coverage.xml
.pytest_cache/ .pytest_cache/
cover/ cover/
tiny-lisp-nightshift/ tiny-lisp-nightshift/
nightshift-imageboard/
# Translations # Translations
*.mo *.mo

View File

@ -1,5 +1,11 @@
# Bugfix TODO # Bugfix TODO
## Some kind of tool install feature
Continually fails on flask_sqlalchemy until I install that.
## Tutorial need to include . directory for imageboard
## Git status artifacts are noisy for non-git repositories ## Git status artifacts are noisy for non-git repositories
Observed artifact: Observed artifact:

View File

@ -47,7 +47,7 @@ python -m pip install -e .
Install target project dependencies: Install target project dependencies:
```bash ```bash
python -m pip install flask pillow pytest python -m pip install flask pillow pytest flask_sqlalchemy
``` ```
Install and start Ollama, then make sure the model is available: Install and start Ollama, then make sure the model is available:

View File

@ -454,6 +454,8 @@ def output_contract_for(stage: StageConfig) -> str:
"Do not include prose outside file blocks.", "Do not include prose outside file blocks.",
"Include every file needed for the task, including tests.", "Include every file needed for the task, including tests.",
"NightShift will generate the unified diff deterministically.", "NightShift will generate the unified diff deterministically.",
"On repair attempts, use the retry notes and failed stage output to diagnose the root cause before changing files.",
"Do not repeat an unchanged solution unless the failure output shows the implementation is already correct.",
] ]
) )
if stage.type == "patch_normalizer": if stage.type == "patch_normalizer":

View File

@ -1015,6 +1015,9 @@ class PipelineRunner:
content = self._read_output(output_path) content = self._read_output(output_path)
if not content.strip(): if not content.strip():
return "" return ""
cleaned_content = re.sub(r"\n{4,}", "\n\n\n", content.strip())
if len(cleaned_content) <= max_chars:
return cleaned_content
patterns = ( patterns = (
"error", "error",
"fail", "fail",

View File

@ -7,6 +7,7 @@ project:
safety: safety:
require_clean_worktree: false require_clean_worktree: false
scoped_paths: scoped_paths:
- .
- src - src
- tests - tests
- templates - templates

View File

@ -73,6 +73,7 @@ def format_banner(stream: TextIO | None = None) -> str:
"██║ ╚████║██║╚██████╔╝██║ ██║ ██║ ███████║██║ ██║██║██║ ██║ ", "██║ ╚████║██║╚██████╔╝██║ ██║ ██║ ███████║██║ ██║██║██║ ██║ ",
"╚═╝ ╚═══╝╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝╚═╝ ╚═╝ ", "╚═╝ ╚═══╝╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝╚═╝╚═╝ ╚═╝ ",
"", "",
" NightShift",
f" [ {quote} ]", f" [ {quote} ]",
" [ planner | implementer | verifier | audit ]", " [ planner | implementer | verifier | audit ]",
"", "",

View File

@ -58,6 +58,20 @@ class AgentExecutorTests(unittest.TestCase):
self.assertIn("Task context body", prompt) self.assertIn("Task context body", prompt)
self.assertIn("- No retries", prompt) self.assertIn("- No retries", prompt)
def test_file_writer_contract_mentions_repair_context(self) -> None:
task = parse_tasks(TASK_MD)[0]
prompt = build_prompt_bundle(
system_prompt="System rules",
stage=StageConfig(id="write", type="file_writer", agent="writer"),
task=task,
project_context="Project context",
previous_outputs={},
retry_notes=["Retry note"],
)
self.assertIn("On repair attempts", prompt)
self.assertIn("failed stage output", prompt)
def test_command_agent_writes_output_and_returns_pass(self) -> None: def test_command_agent_writes_output_and_returns_pass(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)

View File

@ -13,6 +13,7 @@ from nightshift.config import (
StageConfig, StageConfig,
) )
from nightshift.pipeline import PipelineRunner from nightshift.pipeline import PipelineRunner
from nightshift.stages import StageResult
from nightshift.tasks import parse_tasks from nightshift.tasks import parse_tasks
@ -328,6 +329,49 @@ Acceptance Criteria:
self.assertIn("cli.py", content) self.assertIn("cli.py", content)
self.assertIn("main@L1", content) self.assertIn("main@L1", content)
def test_retry_note_keeps_small_failure_output_unfiltered(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
_write_common_files(root)
artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
config = make_config(root, ())
runner = PipelineRunner(config, artifacts)
output_path = artifacts.write_stage_output(
"TASK-001",
"test-output.txt",
"\n".join(
[
"# Command Output: test",
"",
"### stdout",
"",
"```text",
"def test_board_route(self):",
" response = self.client.get('/board/general')",
" self.assertEqual(response.status_code, 200)",
"E AssertionError: 404 != 200",
"```",
"",
]
),
)
relative_output = str(output_path.relative_to(root))
note = runner._format_retry_note(
1,
StageConfig(id="test", type="command", on_fail="write"),
StageResult(
stage_id="test",
status="fail",
reason="Command exited with code 1: python -m pytest -q",
output_path=relative_output,
),
"write",
)
self.assertIn("response = self.client.get('/board/general')", note)
self.assertIn("self.assertEqual(response.status_code, 200)", note)
def test_code_writer_normalizer_and_validator_pipeline(self) -> None: def test_code_writer_normalizer_and_validator_pipeline(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)