mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 10:08:37 +00:00
Project runner tool and writer tooling fixes
i wanted the animated status bar...
This commit is contained in:
parent
d928a52fb3
commit
a0ad9b2ac0
3
docs/bugs.md
Normal file
3
docs/bugs.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# descriptions for logs are slightly off for the status thing. "Starting ollama HTTP model invocation" implies that it's stuck starting when it's not.
|
||||
|
||||
# We've stopped updating the version.
|
||||
|
|
@ -15,6 +15,7 @@ from .integ_setup import format_setup_result, setup_python_project
|
|||
from .integ_test import format_integration_test_result, run_integration_test
|
||||
from .pipeline import PipelineRunner
|
||||
from .runlog import RunLogger
|
||||
from .sandbox_run import format_sandbox_run_result, run_sandbox_project
|
||||
from .status import build_status, format_status
|
||||
from .task_tests import check_task_test_files, format_task_test_checks, missing_task_test_paths
|
||||
from .terminal import HOTDOG_ANIMATIONS, TerminalAnimation, format_banner, style_text
|
||||
|
|
@ -131,6 +132,38 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
integ_test_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
|
||||
integ_test_parser.add_argument("--dry-run", action="store_true", help="Print commands without running setup or tasks.")
|
||||
|
||||
sandbox_parser = subparsers.add_parser(
|
||||
"sandbox-run",
|
||||
help="Copy an existing NightShift project into a sandbox, set it up, and run it.",
|
||||
)
|
||||
sandbox_parser.add_argument("--project", required=True, help="Existing NightShift project directory to copy.")
|
||||
sandbox_output = sandbox_parser.add_mutually_exclusive_group(required=True)
|
||||
sandbox_output.add_argument("--output", help="Sandbox output directory. The project is copied to OUTPUT/project.")
|
||||
sandbox_output.add_argument(
|
||||
"--timestamped",
|
||||
action="store_true",
|
||||
help="Create a timestamped sandbox under ROOT/integ_runs, like integ-test.",
|
||||
)
|
||||
sandbox_parser.add_argument("--root", default=".", help="Root used with --timestamped. Defaults to current directory.")
|
||||
sandbox_parser.add_argument("--task", help="Specific task id to run.")
|
||||
sandbox_parser.add_argument("--all", action="store_true", help="Run all runnable incomplete tasks.")
|
||||
sandbox_parser.add_argument("--force", action="store_true", help="Overwrite an existing OUTPUT/project copy.")
|
||||
sandbox_parser.add_argument(
|
||||
"--setup-extra",
|
||||
action="append",
|
||||
default=["pytest"],
|
||||
help="Extra package to install during setup. May be repeated. Defaults to pytest.",
|
||||
)
|
||||
sandbox_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
|
||||
sandbox_parser.add_argument("--dry-run", action="store_true", help="Create the sandbox copy and print commands without running setup or tasks.")
|
||||
sandbox_parser.add_argument(
|
||||
"--animation",
|
||||
default="status_dots",
|
||||
choices=tuple(sorted(HOTDOG_ANIMATIONS)),
|
||||
help="Terminal animation to show while the sandboxed run is active.",
|
||||
)
|
||||
sandbox_parser.add_argument("--no-animation", action="store_true", help="Disable terminal animation.")
|
||||
|
||||
integ_report_parser = subparsers.add_parser("integ-report", help="Summarize the latest integration run.")
|
||||
integ_report_parser.add_argument("--root", default=".", help="Repository root where integ_runs/ is located.")
|
||||
integ_report_parser.add_argument("--latest", action="store_true", help="Report the latest integration run.")
|
||||
|
|
@ -309,6 +342,24 @@ def main(argv: list[str] | None = None) -> int:
|
|||
print(format_integration_test_result(result))
|
||||
return result.exit_code
|
||||
|
||||
if args.command == "sandbox-run":
|
||||
result = run_sandbox_project(
|
||||
args.project,
|
||||
output=args.output,
|
||||
timestamped=args.timestamped,
|
||||
root=args.root,
|
||||
task=args.task,
|
||||
all_tasks=args.all,
|
||||
setup_extras=tuple(args.setup_extra or ()),
|
||||
skip_setup_validate=args.setup_skip_validate,
|
||||
dry_run=args.dry_run,
|
||||
animation=args.animation,
|
||||
no_animation=args.no_animation,
|
||||
force=args.force,
|
||||
)
|
||||
print(format_sandbox_run_result(result))
|
||||
return result.exit_code
|
||||
|
||||
if args.command == "integ-report":
|
||||
report = build_integration_report(args.root, latest=True)
|
||||
print(format_integration_report(report))
|
||||
|
|
|
|||
|
|
@ -229,7 +229,11 @@ class PipelineRunner:
|
|||
index += 1
|
||||
continue
|
||||
|
||||
target_stage = stage.on_fail or result.next_stage
|
||||
target_stage = result.next_stage or (
|
||||
stage.on_fail
|
||||
if not (stage.type in {"agent_review", "review"} and _is_malformed_review_result(result))
|
||||
else None
|
||||
)
|
||||
analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
|
||||
if analysis_note:
|
||||
retry_notes.append(analysis_note)
|
||||
|
|
@ -481,7 +485,7 @@ class PipelineRunner:
|
|||
result = self.agent_executor.run_stage(
|
||||
self._stage_for_retry_agent(stage, retry_count),
|
||||
task,
|
||||
previous_outputs,
|
||||
_review_previous_outputs(previous_outputs) if stage.type in {"agent_review", "review"} else previous_outputs,
|
||||
retry_notes,
|
||||
project_context=context.project_context,
|
||||
task_context=context.task_context,
|
||||
|
|
@ -501,6 +505,17 @@ class PipelineRunner:
|
|||
context.task_context,
|
||||
context.retry_context,
|
||||
)
|
||||
if stage.type in {"agent_review", "review"} and _is_malformed_review_result(result):
|
||||
return self._rerun_malformed_review(
|
||||
stage,
|
||||
task,
|
||||
result,
|
||||
previous_outputs,
|
||||
retry_notes,
|
||||
retry_count,
|
||||
context.project_context,
|
||||
context.task_context,
|
||||
)
|
||||
return result
|
||||
if stage.type in COMMAND_STAGE_TYPES:
|
||||
return self.command_executor.run_stage(_stage_with_attempt_output(stage, retry_count), task.id)
|
||||
|
|
@ -1217,6 +1232,59 @@ class PipelineRunner:
|
|||
)
|
||||
return f"Debugger output: {debug_result.output_path or 'none'}."
|
||||
|
||||
def _rerun_malformed_review(
|
||||
self,
|
||||
stage: StageConfig,
|
||||
task: Task,
|
||||
malformed_result: StageResult,
|
||||
previous_outputs: dict[str, str],
|
||||
retry_notes: list[str],
|
||||
retry_count: int,
|
||||
project_context: str,
|
||||
task_context: str,
|
||||
) -> StageResult:
|
||||
output_name = _attempt_filename(stage.output or f"{stage.id}.md", retry_count + 1)
|
||||
strict_stage = replace(
|
||||
self._stage_for_retry_agent(stage, retry_count),
|
||||
output=output_name,
|
||||
)
|
||||
self.logger.event(
|
||||
"agent.rerun",
|
||||
"Re-running review after malformed output",
|
||||
stage_id=stage.id,
|
||||
task_id=task.id,
|
||||
)
|
||||
strict_notes = [
|
||||
*retry_notes,
|
||||
"Previous review output was malformed. Return exactly four lines: status, reason, next_stage, context_update. Do not return prose, headings, or analysis.",
|
||||
]
|
||||
strict_outputs = _review_previous_outputs(previous_outputs)
|
||||
strict_outputs["malformed_review_output"] = _compact_previous_output(
|
||||
self._read_output(malformed_result.output_path),
|
||||
max_chars=800,
|
||||
)
|
||||
result = self.agent_executor.run_stage(
|
||||
strict_stage,
|
||||
task,
|
||||
strict_outputs,
|
||||
strict_notes,
|
||||
project_context=project_context,
|
||||
task_context=task_context,
|
||||
retry_context="\n".join(f"- {note}" for note in strict_notes),
|
||||
)
|
||||
if _is_malformed_review_result(result):
|
||||
return StageResult(
|
||||
result.stage_id,
|
||||
"fail",
|
||||
(
|
||||
"Review output remained malformed after a strict formatting retry. "
|
||||
"Stopping without redrafting; inspect the applied draft and review artifact."
|
||||
),
|
||||
output_path=result.output_path,
|
||||
context_update=result.context_update,
|
||||
)
|
||||
return result
|
||||
|
||||
def _modified_files(self) -> tuple[str, ...]:
|
||||
completed = subprocess.run(
|
||||
["git", "status", "--short"],
|
||||
|
|
@ -1608,6 +1676,36 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int
|
|||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _is_malformed_review_result(result: StageResult) -> bool:
|
||||
return result.status == "fail" and (
|
||||
"Review output did not include a valid status" in result.reason
|
||||
or "Review output remained malformed" in result.reason
|
||||
)
|
||||
|
||||
|
||||
def _review_previous_outputs(previous_outputs: dict[str, str], max_chars: int = 1600) -> dict[str, str]:
|
||||
compacted: dict[str, str] = {}
|
||||
priority_names = {
|
||||
"applied.patch",
|
||||
"normalized-draft.patch",
|
||||
"scene-draft.patch",
|
||||
"draft_scene",
|
||||
"apply_draft",
|
||||
"validate_draft",
|
||||
"test",
|
||||
"review",
|
||||
}
|
||||
for name, output in previous_outputs.items():
|
||||
if name in priority_names or name.endswith(".patch") or "draft" in name or "apply" in name:
|
||||
compacted[name] = _compact_previous_output(output, max_chars=max_chars)
|
||||
continue
|
||||
if name in {"plan", "semantic_context", "context"}:
|
||||
compacted[name] = _compact_previous_output(output, max_chars=500)
|
||||
continue
|
||||
compacted[name] = _compact_previous_output(output, max_chars=800)
|
||||
return compacted
|
||||
|
||||
|
||||
def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
|
||||
guidance = _file_writer_stage_guidance(stage)
|
||||
if not guidance or "not allowed for this stage" not in reason:
|
||||
|
|
|
|||
|
|
@ -109,11 +109,101 @@ Acceptance Criteria:
|
|||
- Updates durable state
|
||||
|
||||
---
|
||||
- [ ] SCENE-031: Rollerblade courier run
|
||||
|
||||
Dependencies:
|
||||
- SCENE-003
|
||||
|
||||
Description:
|
||||
Proxy and Cricket rollerblade through late-night Seattle delivering encrypted NightShift inference keys, salvaged hardware, and cached datasets between squatters, artists, and underground operators.
|
||||
|
||||
The scene should establish:
|
||||
- movement through the city
|
||||
- underground mutual aid systems
|
||||
- degraded urban infrastructure
|
||||
- physical geography of Seattle
|
||||
- emotional intimacy through transit
|
||||
|
||||
Environmental details should emphasize:
|
||||
- wet pavement reflecting neon transit signage
|
||||
- abandoned autonomous delivery vehicles
|
||||
- late-night teriyaki shops
|
||||
- extension cords hanging between apartments
|
||||
- cracked sidewalks
|
||||
- rooftop antennas
|
||||
- stale vape clouds in freight elevators
|
||||
|
||||
A subtle anomaly appears when an unrelated ad display briefly shows imagery identical to visuals seen elsewhere in the story.
|
||||
|
||||
Nobody reacts strongly.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Strong Seattle atmosphere
|
||||
- Deepens Proxy and Cricket relationship naturally
|
||||
- Includes rollerblading materially throughout the scene
|
||||
- Introduces subtle recurring anomaly
|
||||
- Avoids exposition-heavy dialogue
|
||||
- Scene length between 1400-2400 words
|
||||
- Writes:
|
||||
- `story/chapters/chapter-001/scene-003a.md`
|
||||
- Updates durable state
|
||||
|
||||
---
|
||||
- [ ] SCENE-032: Kremwerk furry rave
|
||||
|
||||
Dependencies:
|
||||
- SCENE-031
|
||||
|
||||
Description:
|
||||
Proxy and DJ BLOODMONEY attend a crowded underground furry rave at Kremwerk following one of BLOODMONEY's pirate jungle sets.
|
||||
|
||||
The scene should establish:
|
||||
- queer underground culture
|
||||
- synthetic identity experimentation
|
||||
- emotional sincerity beneath irony
|
||||
- anti-corporate creative spaces
|
||||
- generated aesthetics used communally rather than commercially
|
||||
- shape of the romance between Proxy and BLOODMONEY
|
||||
- makeout scene between proxy and bloodmoney
|
||||
|
||||
The rave should feel:
|
||||
- affectionate
|
||||
- overheated
|
||||
- crowded
|
||||
- emotionally necessary
|
||||
|
||||
Environmental details should include:
|
||||
- soaked Capitol Hill sidewalks
|
||||
- damp faux fur
|
||||
- dangling extension cords powering chargers
|
||||
- jungle edits mixed with bassline and hyperpop
|
||||
- generated visuals projected onto concrete pillars
|
||||
- patched jackets with dead startup logos
|
||||
- kandi bracelets
|
||||
- old server racks repurposed into lighting rigs
|
||||
- rollerbladers moving through industrial hallways
|
||||
- people discussing models like music genres
|
||||
|
||||
Proxy gradually realizes many attendees rely emotionally on systems like NightShift.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Avoids mocking underground/furry culture
|
||||
- Strong sensory environmental detail
|
||||
- Reinforces themes of synthetic companionship and community
|
||||
- Includes subtle emotional unease beneath warmth
|
||||
- Maintains grounded tone
|
||||
- Scene length between 1800-3000 words
|
||||
- Writes:
|
||||
- `story/chapters/chapter-001/scene-003b.md`
|
||||
- Updates durable state
|
||||
|
||||
---
|
||||
|
||||
|
||||
- [ ] SCENE-004: Rich district delivery
|
||||
|
||||
Dependencies:
|
||||
- SCENE-003
|
||||
- SCENE-032
|
||||
|
||||
Description:
|
||||
Proxy delivers salvaged compute hardware to a wealthy private social club operating in a quiet offline district.
|
||||
|
|
@ -233,19 +323,59 @@ Proxy becomes uncomfortable with:
|
|||
|
||||
Acceptance Criteria:
|
||||
- Shows expanding underground compute economy
|
||||
- Deepens Proxy’s internal conflict
|
||||
- Deepens Proxy's internal conflict
|
||||
- Introduces operational stress
|
||||
- Maintains grounded tone
|
||||
- Writes:
|
||||
- `story/chapters/chapter-002/scene-002.md`
|
||||
- Updates durable state
|
||||
|
||||
---
|
||||
- [ ] SCENE-081: Free inference night
|
||||
|
||||
Dependencies:
|
||||
- SCENE-008
|
||||
|
||||
Description:
|
||||
Following a successful scavenging run, NightShift temporarily opens free public inference access for one evening.
|
||||
|
||||
Artists, musicians, lonely users, and exhausted workers flood the squat looking for compute access.
|
||||
|
||||
The scene should establish:
|
||||
- NightShift as emotional infrastructure
|
||||
- positive social uses of synthetic systems
|
||||
- underground mutual aid culture
|
||||
- growing operational stress
|
||||
|
||||
Examples should include:
|
||||
- collaborative generated visuals
|
||||
- musicians creating samples
|
||||
- users generating outfit concepts before events
|
||||
- emotionally vulnerable conversations with companion systems
|
||||
- translation of old documents and messages
|
||||
- communal experimentation with weird model outputs
|
||||
|
||||
Proxy slowly realizes NightShift has become psychologically essential for many people.
|
||||
|
||||
This realization unsettles her.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Avoids simplistic "AI bad" framing
|
||||
- Balances warmth with discomfort
|
||||
- Strong environmental detail
|
||||
- Shows growing scale of NightShift operations
|
||||
- Reinforces emotional dependency themes
|
||||
- Scene length between 1800-3000 words
|
||||
- Writes:
|
||||
- `story/chapters/chapter-002/scene-002a.md`
|
||||
- Updates durable state
|
||||
|
||||
---
|
||||
|
||||
- [ ] SCENE-009: Sister Circuit
|
||||
|
||||
Dependencies:
|
||||
- SCENE-008
|
||||
- SCENE-081
|
||||
|
||||
Description:
|
||||
Proxy meets Sister Circuit in a server monastery outside Tacoma.
|
||||
|
|
|
|||
143
nightshift/sandbox_run.py
Normal file
143
nightshift/sandbox_run.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
"""General-purpose setup-and-run sandbox command."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import subprocess
|
||||
import venv
|
||||
|
||||
from .errors import NightShiftError
|
||||
from .integ import _initialize_project_git_repo
|
||||
from .integ_setup import IntegrationSetupResult, setup_python_project
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SandboxRunResult:
|
||||
source_project: Path
|
||||
directory: Path
|
||||
project_dir: Path
|
||||
venv_dir: Path
|
||||
setup: IntegrationSetupResult
|
||||
command: tuple[str, ...]
|
||||
exit_code: int
|
||||
dry_run: bool
|
||||
|
||||
|
||||
def run_sandbox_project(
|
||||
project: str | Path,
|
||||
*,
|
||||
output: str | Path | None = None,
|
||||
timestamped: bool = False,
|
||||
root: str | Path = ".",
|
||||
task: str | None = None,
|
||||
all_tasks: bool = False,
|
||||
setup_extras: tuple[str, ...] = ("pytest",),
|
||||
skip_setup_validate: bool = False,
|
||||
dry_run: bool = False,
|
||||
animation: str = "status_dots",
|
||||
no_animation: bool = False,
|
||||
force: bool = False,
|
||||
) -> SandboxRunResult:
|
||||
"""Copy a NightShift project into a sandbox, set it up, and run it."""
|
||||
|
||||
if task and all_tasks:
|
||||
raise NightShiftError("Sandbox run error: use either --task or --all, not both.")
|
||||
if not task and not all_tasks:
|
||||
raise NightShiftError("Sandbox run error: provide --task or --all.")
|
||||
if output and timestamped:
|
||||
raise NightShiftError("Sandbox run error: use either --output or --timestamped, not both.")
|
||||
if not output and not timestamped:
|
||||
raise NightShiftError("Sandbox run error: provide --output or --timestamped.")
|
||||
|
||||
source = Path(project).resolve()
|
||||
if not source.exists() or not source.is_dir():
|
||||
raise NightShiftError(f"Sandbox run error: project directory does not exist: {source}")
|
||||
if not (source / "nightshift.yaml").exists():
|
||||
raise NightShiftError(f"Sandbox run error: project does not contain nightshift.yaml: {source}")
|
||||
|
||||
sandbox_dir = _sandbox_directory(output, root=root, timestamped=timestamped)
|
||||
project_dir = sandbox_dir / "project"
|
||||
venv_dir = sandbox_dir / ".venv"
|
||||
if project_dir.exists() and any(project_dir.iterdir()) and not force:
|
||||
raise NightShiftError(f"Sandbox run error: output project already exists: {project_dir}")
|
||||
|
||||
sandbox_dir.mkdir(parents=True, exist_ok=True)
|
||||
if project_dir.exists():
|
||||
shutil.rmtree(project_dir)
|
||||
shutil.copytree(source, project_dir, ignore=_copy_ignore)
|
||||
if not dry_run:
|
||||
if not venv_dir.exists():
|
||||
venv.EnvBuilder(with_pip=True).create(venv_dir)
|
||||
_initialize_project_git_repo(project_dir)
|
||||
|
||||
setup = setup_python_project(
|
||||
project_dir,
|
||||
extras=setup_extras,
|
||||
validate=not skip_setup_validate,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
command = [str(setup.python), "-m", "nightshift.cli", "run"]
|
||||
if no_animation:
|
||||
command.append("--no-animation")
|
||||
elif animation:
|
||||
command.extend(["--animation", animation])
|
||||
if all_tasks:
|
||||
command.append("--all")
|
||||
else:
|
||||
command.extend(["--task", task or ""])
|
||||
|
||||
exit_code = 0
|
||||
if not dry_run:
|
||||
completed = subprocess.run(command, cwd=project_dir, text=True, encoding="utf-8", errors="replace")
|
||||
exit_code = completed.returncode
|
||||
|
||||
return SandboxRunResult(
|
||||
source_project=source,
|
||||
directory=sandbox_dir,
|
||||
project_dir=project_dir,
|
||||
venv_dir=venv_dir,
|
||||
setup=setup,
|
||||
command=tuple(command),
|
||||
exit_code=exit_code,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def format_sandbox_run_result(result: SandboxRunResult) -> str:
|
||||
lines = [
|
||||
f"Source project: {result.source_project}",
|
||||
f"Sandbox: {result.directory}",
|
||||
f"Project: {result.project_dir}",
|
||||
f"Venv: {result.venv_dir}",
|
||||
f"Run command: {' '.join(result.command)}",
|
||||
f"Exit code: {result.exit_code}",
|
||||
f"Artifacts: {result.project_dir / '.nightshift'}",
|
||||
]
|
||||
if result.dry_run:
|
||||
lines.insert(0, "Dry run: true")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _sandbox_directory(output: str | Path | None, *, root: str | Path, timestamped: bool) -> Path:
|
||||
if output:
|
||||
return Path(output).resolve()
|
||||
base = Path(root).resolve() / "integ_runs"
|
||||
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
|
||||
return base / run_id
|
||||
|
||||
|
||||
def _copy_ignore(directory: str, names: list[str]) -> set[str]:
|
||||
ignored = {
|
||||
".git",
|
||||
".pytest_cache",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
".venv",
|
||||
"venv",
|
||||
}
|
||||
if Path(directory).name == ".nightshift":
|
||||
ignored.update({"runs", "run-summary.md", "run.log", "project-context.md", "project-context-chart.md"})
|
||||
return {name for name in names if name in ignored or name.endswith(".egg-info")}
|
||||
|
|
@ -168,6 +168,7 @@ class TerminalAnimation:
|
|||
self._width = 0
|
||||
self._lock = threading.Lock()
|
||||
self._last_rendered = ""
|
||||
self._last_status_line = ""
|
||||
|
||||
def __enter__(self) -> "TerminalAnimation":
|
||||
self.start()
|
||||
|
|
@ -194,6 +195,7 @@ class TerminalAnimation:
|
|||
def update_message(self, message: str) -> None:
|
||||
with self._lock:
|
||||
self.message = message
|
||||
self._emit_status_line(message)
|
||||
|
||||
def emit(self, line: str) -> None:
|
||||
if not self.enabled:
|
||||
|
|
@ -238,6 +240,18 @@ class TerminalAnimation:
|
|||
self.stream.write("\r" + (" " * self._width) + "\r")
|
||||
self.stream.flush()
|
||||
|
||||
def _emit_status_line(self, message: str) -> None:
|
||||
line = format_status_bar_message(message, stream=self.stream)
|
||||
if line == self._last_status_line:
|
||||
return
|
||||
self._last_status_line = line
|
||||
if self.enabled:
|
||||
self._clear()
|
||||
print(line)
|
||||
self._render_frame(0)
|
||||
return
|
||||
print(line)
|
||||
|
||||
|
||||
def animation_frames(name: str) -> tuple[str, ...]:
|
||||
frames = HOTDOG_ANIMATIONS.get(name)
|
||||
|
|
|
|||
|
|
@ -153,6 +153,93 @@ class PipelineRunnerTests(unittest.TestCase):
|
|||
self.assertIn("Retry limit reached", result.reason)
|
||||
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])
|
||||
|
||||
def test_malformed_review_gets_strict_retry_without_redrafting(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
_write_common_files(root)
|
||||
(root / "fake_reviewer.py").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"import sys",
|
||||
"prompt = sys.stdin.read()",
|
||||
"if 'Previous review output was malformed' in prompt:",
|
||||
" print('status: pass')",
|
||||
" print('reason: strict retry ok')",
|
||||
" print('next_stage: none')",
|
||||
" print('context_update: none')",
|
||||
"else:",
|
||||
" print('files')",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
stages = (
|
||||
StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
|
||||
StageConfig(
|
||||
id="review",
|
||||
type="agent_review",
|
||||
agent="reviewer",
|
||||
on_fail="implement",
|
||||
output="review.md",
|
||||
),
|
||||
StageConfig(id="summarize", type="summarize", output="final-notes.md"),
|
||||
)
|
||||
config = make_config(root, stages, max_retries=2)
|
||||
config.agents["reviewer"] = AgentConfig(
|
||||
id="reviewer",
|
||||
backend="command",
|
||||
command="python fake_reviewer.py",
|
||||
system_prompt=Path("reviewer.md"),
|
||||
)
|
||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||
task = parse_tasks(TASK_MD)[0]
|
||||
|
||||
result = runner.run_task(task)
|
||||
|
||||
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
|
||||
self.assertEqual(result.status, "complete")
|
||||
self.assertEqual(result.retry_count, 0)
|
||||
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "summarize"])
|
||||
self.assertTrue((task_dir / "review.md").exists())
|
||||
self.assertTrue((task_dir / "review-1.md").exists())
|
||||
self.assertIn("files", (task_dir / "review.md").read_text(encoding="utf-8"))
|
||||
self.assertIn("strict retry ok", (task_dir / "review-1.md").read_text(encoding="utf-8"))
|
||||
|
||||
def test_malformed_review_stops_without_on_fail_redraft(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
_write_common_files(root)
|
||||
(root / "fake_reviewer.py").write_text("print('files')\n", encoding="utf-8")
|
||||
stages = (
|
||||
StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
|
||||
StageConfig(
|
||||
id="review",
|
||||
type="agent_review",
|
||||
agent="reviewer",
|
||||
on_fail="implement",
|
||||
output="review.md",
|
||||
),
|
||||
)
|
||||
config = make_config(root, stages, max_retries=2)
|
||||
config.agents["reviewer"] = AgentConfig(
|
||||
id="reviewer",
|
||||
backend="command",
|
||||
command="python fake_reviewer.py",
|
||||
system_prompt=Path("reviewer.md"),
|
||||
)
|
||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||
task = parse_tasks(TASK_MD)[0]
|
||||
|
||||
result = runner.run_task(task)
|
||||
|
||||
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
|
||||
self.assertEqual(result.status, "failed")
|
||||
self.assertEqual(result.retry_count, 0)
|
||||
self.assertIn("remained malformed", result.reason)
|
||||
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review"])
|
||||
self.assertTrue((task_dir / "review.md").exists())
|
||||
self.assertTrue((task_dir / "review-1.md").exists())
|
||||
|
||||
def test_passing_review_next_stage_is_ignored(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
|
|
|
|||
70
tests/test_sandbox_run.py
Normal file
70
tests/test_sandbox_run.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
from pathlib import Path
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from nightshift.errors import NightShiftError
|
||||
from nightshift.sandbox_run import format_sandbox_run_result, run_sandbox_project
|
||||
|
||||
|
||||
class SandboxRunTests(unittest.TestCase):
|
||||
def test_sandbox_run_dry_run_copies_existing_project_and_keeps_animation(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
source = root / "source"
|
||||
source.mkdir()
|
||||
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||
(source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
|
||||
(source / ".nightshift").mkdir()
|
||||
(source / ".nightshift" / "tasks.md").write_text("- [ ] TASK-001: Demo\n\nAcceptance Criteria:\n- done\n", encoding="utf-8")
|
||||
(source / ".nightshift" / "runs").mkdir()
|
||||
(source / ".nightshift" / "runs" / "old.txt").write_text("old artifact", encoding="utf-8")
|
||||
output = root / "sandbox"
|
||||
|
||||
result = run_sandbox_project(
|
||||
source,
|
||||
output=output,
|
||||
task="TASK-001",
|
||||
dry_run=True,
|
||||
)
|
||||
|
||||
rendered = format_sandbox_run_result(result)
|
||||
self.assertIn("Dry run: true", rendered)
|
||||
self.assertEqual(result.project_dir, output / "project")
|
||||
self.assertTrue((output / "project" / "nightshift.yaml").exists())
|
||||
self.assertTrue((output / "project" / ".nightshift" / "tasks.md").exists())
|
||||
self.assertFalse((output / "project" / ".nightshift" / "runs").exists())
|
||||
self.assertIn("--animation", result.command)
|
||||
self.assertNotIn("--no-animation", result.command)
|
||||
self.assertIn("TASK-001", result.command)
|
||||
|
||||
def test_sandbox_run_timestamped_uses_integ_runs_directory(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
source = root / "source"
|
||||
source.mkdir()
|
||||
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||
(source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
|
||||
|
||||
result = run_sandbox_project(
|
||||
source,
|
||||
root=root,
|
||||
timestamped=True,
|
||||
all_tasks=True,
|
||||
dry_run=True,
|
||||
)
|
||||
|
||||
self.assertEqual(result.directory.parent, root / "integ_runs")
|
||||
self.assertIn("--all", result.command)
|
||||
|
||||
def test_sandbox_run_requires_output_or_timestamped(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
source = Path(directory) / "source"
|
||||
source.mkdir()
|
||||
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||
|
||||
with self.assertRaisesRegex(NightShiftError, "provide --output or --timestamped"):
|
||||
run_sandbox_project(source, task="TASK-001", dry_run=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -57,6 +57,17 @@ class TerminalStylingTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(output.getvalue().strip(), "plain log")
|
||||
|
||||
def test_terminal_animation_status_update_prints_for_non_tty(self) -> None:
|
||||
stream = StringIO()
|
||||
output = StringIO()
|
||||
animation = TerminalAnimation(stream=stream)
|
||||
|
||||
with patch("sys.stdout", output):
|
||||
animation.update_message("Task: TASK-001 | >> Stage: plan")
|
||||
|
||||
self.assertIn("[NightShift]", output.getvalue())
|
||||
self.assertIn("Stage: plan", output.getvalue())
|
||||
|
||||
def test_terminal_animation_renders_immediately_when_started(self) -> None:
|
||||
stream = FakeTTY()
|
||||
animation = TerminalAnimation(
|
||||
|
|
@ -85,10 +96,12 @@ class TerminalStylingTests(unittest.TestCase):
|
|||
with patch("sys.stdout", output):
|
||||
animation.start()
|
||||
animation.emit("log line")
|
||||
animation.update_message("Stage: write")
|
||||
stream_output = stream.getvalue()
|
||||
animation.stop()
|
||||
|
||||
self.assertIn("log line", output.getvalue())
|
||||
self.assertIn("Stage: write", output.getvalue())
|
||||
self.assertGreaterEqual(stream_output.count("Stage: plan"), 2)
|
||||
|
||||
def test_format_status_bar_message_uses_status_color(self) -> None:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user