Fixes around test run 2

This commit is contained in:
K. Hodges 2026-05-17 13:46:26 -07:00
parent a32142e9ea
commit ec9181eb64
7 changed files with 116 additions and 8 deletions

1
.gitignore vendored
View File

@ -50,6 +50,7 @@ coverage.xml
.hypothesis/ .hypothesis/
.pytest_cache/ .pytest_cache/
cover/ cover/
tiny-lisp-nightshift/
# Translations # Translations
*.mo *.mo

View File

@ -43,6 +43,15 @@ def get_git_status(project_root: Path) -> GitCommandResult:
return run_git(project_root, ["status", "--short"]) return run_git(project_root, ["status", "--short"])
def get_git_repository_state(project_root: Path) -> GitCommandResult:
return run_git(project_root, ["rev-parse", "--is-inside-work-tree"])
def is_git_repository(project_root: Path) -> bool:
state = get_git_repository_state(project_root)
return state.available and state.stdout.strip() == "true"
def ensure_clean_worktree(project_root: Path, require_clean: bool) -> None: def ensure_clean_worktree(project_root: Path, require_clean: bool) -> None:
if not require_clean: if not require_clean:
return return
@ -63,9 +72,14 @@ def write_git_artifacts(artifacts: ArtifactStore, task_id: str, when: str) -> Pa
def write_diff_artifact(artifacts: ArtifactStore, task_id: str) -> Path: def write_diff_artifact(artifacts: ArtifactStore, task_id: str) -> Path:
if not is_git_repository(artifacts.project_root):
content = "Git diff unavailable.\n\nReason: project root is not a git work tree.\n"
return artifacts.write_stage_output(task_id, "diff.patch", content)
diff = run_git(artifacts.project_root, ["diff", "--binary"], timeout_seconds=30) diff = run_git(artifacts.project_root, ["diff", "--binary"], timeout_seconds=30)
if not diff.available: if not diff.available:
content = "Git diff unavailable.\n\n" + (diff.stderr or "") details = (diff.stderr or "unknown git error").strip()
content = f"Git diff unavailable.\n\nReason: {details}\n"
elif diff.stdout: elif diff.stdout:
content = diff.stdout content = diff.stdout
else: else:

View File

@ -436,14 +436,46 @@ class PipelineRunner:
chart_path = self.artifacts.project_context_chart_path chart_path = self.artifacts.project_context_chart_path
if chart_path.exists(): if chart_path.exists():
enriched_outputs["project-context-chart.md"] = chart_path.read_text(encoding="utf-8", errors="replace") enriched_outputs["project-context-chart.md"] = chart_path.read_text(encoding="utf-8", errors="replace")
context = self.context.read_context(task, retry_notes)
result = self.agent_executor.run_stage( result = self.agent_executor.run_stage(
stage, stage,
task, task,
enriched_outputs, enriched_outputs,
retry_notes, retry_notes,
project_context=self.context.read_context(task, retry_notes).project_context, project_context=context.project_context,
task_context=self.context.read_context(task, retry_notes).task_context, task_context=context.task_context,
retry_context=self.context.read_context(task, retry_notes).retry_context, retry_context=context.retry_context,
)
raw_output = self._read_output(result.output_path)
stdout = extract_agent_stdout(raw_output)
lookup_requests = parse_lookup_requests(stdout)
if lookup_requests and "diff --git " not in stdout:
lookup_context = self.repo_tools.execute_requests(
task.id,
lookup_requests,
filename="implementation-files-inspected.md",
)
self.logger.event(
"agent.rerun",
"Re-running code writer with repo lookup context",
stage_id=stage.id,
task_id=task.id,
lookup_count=len(lookup_requests),
)
rerun_outputs = dict(enriched_outputs)
rerun_outputs["repo_lookup_results"] = lookup_context
rerun_notes = [
*retry_notes,
"Repository lookup results have been provided. Return the unified diff now; do not request more lookups.",
]
result = self.agent_executor.run_stage(
stage,
task,
rerun_outputs,
rerun_notes,
project_context=context.project_context,
task_context=context.task_context,
retry_context="\n".join(f"- {note}" for note in rerun_notes),
) )
raw_output = self._read_output(result.output_path) raw_output = self._read_output(result.output_path)
stdout = extract_agent_stdout(raw_output) stdout = extract_agent_stdout(raw_output)
@ -657,14 +689,18 @@ class PipelineRunner:
) )
rerun_outputs = dict(previous_outputs) rerun_outputs = dict(previous_outputs)
rerun_outputs["repo_lookup_results"] = lookup_context rerun_outputs["repo_lookup_results"] = lookup_context
rerun_retry_notes = [
*retry_notes,
"Repository lookup results have been provided. Write the final plan now; do not request more lookups.",
]
rerun_result = self.agent_executor.run_stage( rerun_result = self.agent_executor.run_stage(
stage, stage,
task, task,
rerun_outputs, rerun_outputs,
retry_notes, rerun_retry_notes,
project_context=project_context, project_context=project_context,
task_context=task_context, task_context=task_context,
retry_context=retry_context, retry_context="\n".join(f"- {note}" for note in rerun_retry_notes),
) )
return StageResult( return StageResult(
stage_id=rerun_result.stage_id, stage_id=rerun_result.stage_id,

View File

@ -217,7 +217,7 @@ def parse_lookup_requests(text: str, max_requests: int = DEFAULT_MAX_LOOKUP_REQU
continue continue
key, value = stripped.split(":", 1) key, value = stripped.split(":", 1)
key = key.strip() key = key.strip()
value = value.strip().strip('"').strip("'") value = value.strip().strip('"').strip("'").strip("`")
if key == "tool" and current: if key == "tool" and current:
flush() flush()
current[key] = value current[key] = value

View File

@ -38,6 +38,17 @@ class GitSafetyTests(unittest.TestCase):
self.assertTrue(diff_path.exists()) self.assertTrue(diff_path.exists())
self.assertIn("Git Status before", status_path.read_text(encoding="utf-8")) self.assertIn("Git Status before", status_path.read_text(encoding="utf-8"))
def test_diff_artifact_is_concise_outside_git_repo(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
artifacts = ArtifactStore(root, ".nightshift", run_id="test-run")
diff_path = write_diff_artifact(artifacts, "TASK-001")
content = diff_path.read_text(encoding="utf-8")
self.assertIn("project root is not a git work tree", content)
self.assertNotIn("usage: git diff", content)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -373,6 +373,52 @@ Acceptance Criteria:
self.assertTrue((task_dir / "normalized.patch").exists()) self.assertTrue((task_dir / "normalized.patch").exists())
self.assertIn("Status: pass", (task_dir / "patch-validation.md").read_text(encoding="utf-8")) self.assertIn("Status: pass", (task_dir / "patch-validation.md").read_text(encoding="utf-8"))
def test_code_writer_lookup_requests_are_rerun_with_context(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
_write_common_files(root)
(root / "app.py").write_text("old\n", encoding="utf-8")
(root / "fake_writer.py").write_text(
"\n".join(
[
"import sys",
"prompt = sys.stdin.read()",
"if 'repo_lookup_results' in prompt:",
" print('diff --git a/app.py b/app.py')",
" print('--- a/app.py')",
" print('+++ b/app.py')",
" print('@@ -1 +1 @@')",
" print('-old')",
" print('+new')",
"else:",
" print('lookup_requests:')",
" print('- tool: read_file')",
" print(' path: app.py')",
]
),
encoding="utf-8",
)
stages = (
StageConfig(id="write", type="code_writer", agent="writer"),
StageConfig(id="normalize", type="patch_normalizer"),
StageConfig(id="validate", type="patch_validator"),
)
config = make_config(root, stages)
config.agents["writer"] = AgentConfig(
id="writer",
backend="command",
command="python fake_writer.py",
system_prompt=Path("planner.md"),
)
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
result = runner.run_task(parse_tasks(TASK_MD)[0])
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001"
self.assertEqual(result.status, "complete")
self.assertTrue((task_dir / "implementation-files-inspected.md").exists())
self.assertIn("diff --git a/app.py b/app.py", (task_dir / "proposed.patch").read_text(encoding="utf-8"))
def test_patch_validator_rejects_unsafe_patch(self) -> None: def test_patch_validator_rejects_unsafe_patch(self) -> None:
with tempfile.TemporaryDirectory() as directory: with tempfile.TemporaryDirectory() as directory:
root = Path(directory) root = Path(directory)

View File

@ -33,7 +33,7 @@ lookup_requests:
path: nightshift/pipeline.py path: nightshift/pipeline.py
- tool: grep - tool: grep
path: nightshift path: nightshift
pattern: PipelineRunner pattern: `PipelineRunner`
""" """
requests = parse_lookup_requests(output) requests = parse_lookup_requests(output)