mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 10:08:37 +00:00
Project runner tool and writer tooling fixes
i wanted the animated status bar...
This commit is contained in:
parent
d928a52fb3
commit
a0ad9b2ac0
3
docs/bugs.md
Normal file
3
docs/bugs.md
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
# descriptions for logs are slightly off for the status thing. "Starting ollama HTTP model invocation" implies that it's stuck starting when it's not.
|
||||||
|
|
||||||
|
# We've stopped updating the version.
|
||||||
|
|
@ -15,6 +15,7 @@ from .integ_setup import format_setup_result, setup_python_project
|
||||||
from .integ_test import format_integration_test_result, run_integration_test
|
from .integ_test import format_integration_test_result, run_integration_test
|
||||||
from .pipeline import PipelineRunner
|
from .pipeline import PipelineRunner
|
||||||
from .runlog import RunLogger
|
from .runlog import RunLogger
|
||||||
|
from .sandbox_run import format_sandbox_run_result, run_sandbox_project
|
||||||
from .status import build_status, format_status
|
from .status import build_status, format_status
|
||||||
from .task_tests import check_task_test_files, format_task_test_checks, missing_task_test_paths
|
from .task_tests import check_task_test_files, format_task_test_checks, missing_task_test_paths
|
||||||
from .terminal import HOTDOG_ANIMATIONS, TerminalAnimation, format_banner, style_text
|
from .terminal import HOTDOG_ANIMATIONS, TerminalAnimation, format_banner, style_text
|
||||||
|
|
@ -131,6 +132,38 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
integ_test_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
|
integ_test_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
|
||||||
integ_test_parser.add_argument("--dry-run", action="store_true", help="Print commands without running setup or tasks.")
|
integ_test_parser.add_argument("--dry-run", action="store_true", help="Print commands without running setup or tasks.")
|
||||||
|
|
||||||
|
sandbox_parser = subparsers.add_parser(
|
||||||
|
"sandbox-run",
|
||||||
|
help="Copy an existing NightShift project into a sandbox, set it up, and run it.",
|
||||||
|
)
|
||||||
|
sandbox_parser.add_argument("--project", required=True, help="Existing NightShift project directory to copy.")
|
||||||
|
sandbox_output = sandbox_parser.add_mutually_exclusive_group(required=True)
|
||||||
|
sandbox_output.add_argument("--output", help="Sandbox output directory. The project is copied to OUTPUT/project.")
|
||||||
|
sandbox_output.add_argument(
|
||||||
|
"--timestamped",
|
||||||
|
action="store_true",
|
||||||
|
help="Create a timestamped sandbox under ROOT/integ_runs, like integ-test.",
|
||||||
|
)
|
||||||
|
sandbox_parser.add_argument("--root", default=".", help="Root used with --timestamped. Defaults to current directory.")
|
||||||
|
sandbox_parser.add_argument("--task", help="Specific task id to run.")
|
||||||
|
sandbox_parser.add_argument("--all", action="store_true", help="Run all runnable incomplete tasks.")
|
||||||
|
sandbox_parser.add_argument("--force", action="store_true", help="Overwrite an existing OUTPUT/project copy.")
|
||||||
|
sandbox_parser.add_argument(
|
||||||
|
"--setup-extra",
|
||||||
|
action="append",
|
||||||
|
default=["pytest"],
|
||||||
|
help="Extra package to install during setup. May be repeated. Defaults to pytest.",
|
||||||
|
)
|
||||||
|
sandbox_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
|
||||||
|
sandbox_parser.add_argument("--dry-run", action="store_true", help="Create the sandbox copy and print commands without running setup or tasks.")
|
||||||
|
sandbox_parser.add_argument(
|
||||||
|
"--animation",
|
||||||
|
default="status_dots",
|
||||||
|
choices=tuple(sorted(HOTDOG_ANIMATIONS)),
|
||||||
|
help="Terminal animation to show while the sandboxed run is active.",
|
||||||
|
)
|
||||||
|
sandbox_parser.add_argument("--no-animation", action="store_true", help="Disable terminal animation.")
|
||||||
|
|
||||||
integ_report_parser = subparsers.add_parser("integ-report", help="Summarize the latest integration run.")
|
integ_report_parser = subparsers.add_parser("integ-report", help="Summarize the latest integration run.")
|
||||||
integ_report_parser.add_argument("--root", default=".", help="Repository root where integ_runs/ is located.")
|
integ_report_parser.add_argument("--root", default=".", help="Repository root where integ_runs/ is located.")
|
||||||
integ_report_parser.add_argument("--latest", action="store_true", help="Report the latest integration run.")
|
integ_report_parser.add_argument("--latest", action="store_true", help="Report the latest integration run.")
|
||||||
|
|
@ -309,6 +342,24 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
print(format_integration_test_result(result))
|
print(format_integration_test_result(result))
|
||||||
return result.exit_code
|
return result.exit_code
|
||||||
|
|
||||||
|
if args.command == "sandbox-run":
|
||||||
|
result = run_sandbox_project(
|
||||||
|
args.project,
|
||||||
|
output=args.output,
|
||||||
|
timestamped=args.timestamped,
|
||||||
|
root=args.root,
|
||||||
|
task=args.task,
|
||||||
|
all_tasks=args.all,
|
||||||
|
setup_extras=tuple(args.setup_extra or ()),
|
||||||
|
skip_setup_validate=args.setup_skip_validate,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
animation=args.animation,
|
||||||
|
no_animation=args.no_animation,
|
||||||
|
force=args.force,
|
||||||
|
)
|
||||||
|
print(format_sandbox_run_result(result))
|
||||||
|
return result.exit_code
|
||||||
|
|
||||||
if args.command == "integ-report":
|
if args.command == "integ-report":
|
||||||
report = build_integration_report(args.root, latest=True)
|
report = build_integration_report(args.root, latest=True)
|
||||||
print(format_integration_report(report))
|
print(format_integration_report(report))
|
||||||
|
|
|
||||||
|
|
@ -229,7 +229,11 @@ class PipelineRunner:
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
target_stage = stage.on_fail or result.next_stage
|
target_stage = result.next_stage or (
|
||||||
|
stage.on_fail
|
||||||
|
if not (stage.type in {"agent_review", "review"} and _is_malformed_review_result(result))
|
||||||
|
else None
|
||||||
|
)
|
||||||
analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
|
analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
|
||||||
if analysis_note:
|
if analysis_note:
|
||||||
retry_notes.append(analysis_note)
|
retry_notes.append(analysis_note)
|
||||||
|
|
@ -481,7 +485,7 @@ class PipelineRunner:
|
||||||
result = self.agent_executor.run_stage(
|
result = self.agent_executor.run_stage(
|
||||||
self._stage_for_retry_agent(stage, retry_count),
|
self._stage_for_retry_agent(stage, retry_count),
|
||||||
task,
|
task,
|
||||||
previous_outputs,
|
_review_previous_outputs(previous_outputs) if stage.type in {"agent_review", "review"} else previous_outputs,
|
||||||
retry_notes,
|
retry_notes,
|
||||||
project_context=context.project_context,
|
project_context=context.project_context,
|
||||||
task_context=context.task_context,
|
task_context=context.task_context,
|
||||||
|
|
@ -501,6 +505,17 @@ class PipelineRunner:
|
||||||
context.task_context,
|
context.task_context,
|
||||||
context.retry_context,
|
context.retry_context,
|
||||||
)
|
)
|
||||||
|
if stage.type in {"agent_review", "review"} and _is_malformed_review_result(result):
|
||||||
|
return self._rerun_malformed_review(
|
||||||
|
stage,
|
||||||
|
task,
|
||||||
|
result,
|
||||||
|
previous_outputs,
|
||||||
|
retry_notes,
|
||||||
|
retry_count,
|
||||||
|
context.project_context,
|
||||||
|
context.task_context,
|
||||||
|
)
|
||||||
return result
|
return result
|
||||||
if stage.type in COMMAND_STAGE_TYPES:
|
if stage.type in COMMAND_STAGE_TYPES:
|
||||||
return self.command_executor.run_stage(_stage_with_attempt_output(stage, retry_count), task.id)
|
return self.command_executor.run_stage(_stage_with_attempt_output(stage, retry_count), task.id)
|
||||||
|
|
@ -1217,6 +1232,59 @@ class PipelineRunner:
|
||||||
)
|
)
|
||||||
return f"Debugger output: {debug_result.output_path or 'none'}."
|
return f"Debugger output: {debug_result.output_path or 'none'}."
|
||||||
|
|
||||||
|
def _rerun_malformed_review(
|
||||||
|
self,
|
||||||
|
stage: StageConfig,
|
||||||
|
task: Task,
|
||||||
|
malformed_result: StageResult,
|
||||||
|
previous_outputs: dict[str, str],
|
||||||
|
retry_notes: list[str],
|
||||||
|
retry_count: int,
|
||||||
|
project_context: str,
|
||||||
|
task_context: str,
|
||||||
|
) -> StageResult:
|
||||||
|
output_name = _attempt_filename(stage.output or f"{stage.id}.md", retry_count + 1)
|
||||||
|
strict_stage = replace(
|
||||||
|
self._stage_for_retry_agent(stage, retry_count),
|
||||||
|
output=output_name,
|
||||||
|
)
|
||||||
|
self.logger.event(
|
||||||
|
"agent.rerun",
|
||||||
|
"Re-running review after malformed output",
|
||||||
|
stage_id=stage.id,
|
||||||
|
task_id=task.id,
|
||||||
|
)
|
||||||
|
strict_notes = [
|
||||||
|
*retry_notes,
|
||||||
|
"Previous review output was malformed. Return exactly four lines: status, reason, next_stage, context_update. Do not return prose, headings, or analysis.",
|
||||||
|
]
|
||||||
|
strict_outputs = _review_previous_outputs(previous_outputs)
|
||||||
|
strict_outputs["malformed_review_output"] = _compact_previous_output(
|
||||||
|
self._read_output(malformed_result.output_path),
|
||||||
|
max_chars=800,
|
||||||
|
)
|
||||||
|
result = self.agent_executor.run_stage(
|
||||||
|
strict_stage,
|
||||||
|
task,
|
||||||
|
strict_outputs,
|
||||||
|
strict_notes,
|
||||||
|
project_context=project_context,
|
||||||
|
task_context=task_context,
|
||||||
|
retry_context="\n".join(f"- {note}" for note in strict_notes),
|
||||||
|
)
|
||||||
|
if _is_malformed_review_result(result):
|
||||||
|
return StageResult(
|
||||||
|
result.stage_id,
|
||||||
|
"fail",
|
||||||
|
(
|
||||||
|
"Review output remained malformed after a strict formatting retry. "
|
||||||
|
"Stopping without redrafting; inspect the applied draft and review artifact."
|
||||||
|
),
|
||||||
|
output_path=result.output_path,
|
||||||
|
context_update=result.context_update,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
def _modified_files(self) -> tuple[str, ...]:
|
def _modified_files(self) -> tuple[str, ...]:
|
||||||
completed = subprocess.run(
|
completed = subprocess.run(
|
||||||
["git", "status", "--short"],
|
["git", "status", "--short"],
|
||||||
|
|
@ -1608,6 +1676,36 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_malformed_review_result(result: StageResult) -> bool:
|
||||||
|
return result.status == "fail" and (
|
||||||
|
"Review output did not include a valid status" in result.reason
|
||||||
|
or "Review output remained malformed" in result.reason
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _review_previous_outputs(previous_outputs: dict[str, str], max_chars: int = 1600) -> dict[str, str]:
|
||||||
|
compacted: dict[str, str] = {}
|
||||||
|
priority_names = {
|
||||||
|
"applied.patch",
|
||||||
|
"normalized-draft.patch",
|
||||||
|
"scene-draft.patch",
|
||||||
|
"draft_scene",
|
||||||
|
"apply_draft",
|
||||||
|
"validate_draft",
|
||||||
|
"test",
|
||||||
|
"review",
|
||||||
|
}
|
||||||
|
for name, output in previous_outputs.items():
|
||||||
|
if name in priority_names or name.endswith(".patch") or "draft" in name or "apply" in name:
|
||||||
|
compacted[name] = _compact_previous_output(output, max_chars=max_chars)
|
||||||
|
continue
|
||||||
|
if name in {"plan", "semantic_context", "context"}:
|
||||||
|
compacted[name] = _compact_previous_output(output, max_chars=500)
|
||||||
|
continue
|
||||||
|
compacted[name] = _compact_previous_output(output, max_chars=800)
|
||||||
|
return compacted
|
||||||
|
|
||||||
|
|
||||||
def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
|
def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
|
||||||
guidance = _file_writer_stage_guidance(stage)
|
guidance = _file_writer_stage_guidance(stage)
|
||||||
if not guidance or "not allowed for this stage" not in reason:
|
if not guidance or "not allowed for this stage" not in reason:
|
||||||
|
|
|
||||||
|
|
@ -109,11 +109,101 @@ Acceptance Criteria:
|
||||||
- Updates durable state
|
- Updates durable state
|
||||||
|
|
||||||
---
|
---
|
||||||
|
- [ ] SCENE-031: Rollerblade courier run
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- SCENE-003
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Proxy and Cricket rollerblade through late-night Seattle delivering encrypted NightShift inference keys, salvaged hardware, and cached datasets between squatters, artists, and underground operators.
|
||||||
|
|
||||||
|
The scene should establish:
|
||||||
|
- movement through the city
|
||||||
|
- underground mutual aid systems
|
||||||
|
- degraded urban infrastructure
|
||||||
|
- physical geography of Seattle
|
||||||
|
- emotional intimacy through transit
|
||||||
|
|
||||||
|
Environmental details should emphasize:
|
||||||
|
- wet pavement reflecting neon transit signage
|
||||||
|
- abandoned autonomous delivery vehicles
|
||||||
|
- late-night teriyaki shops
|
||||||
|
- extension cords hanging between apartments
|
||||||
|
- cracked sidewalks
|
||||||
|
- rooftop antennas
|
||||||
|
- stale vape clouds in freight elevators
|
||||||
|
|
||||||
|
A subtle anomaly appears when an unrelated ad display briefly shows imagery identical to visuals seen elsewhere in the story.
|
||||||
|
|
||||||
|
Nobody reacts strongly.
|
||||||
|
|
||||||
|
Acceptance Criteria:
|
||||||
|
- Strong Seattle atmosphere
|
||||||
|
- Deepens Proxy and Cricket relationship naturally
|
||||||
|
- Includes rollerblading materially throughout the scene
|
||||||
|
- Introduces subtle recurring anomaly
|
||||||
|
- Avoids exposition-heavy dialogue
|
||||||
|
- Scene length between 1400-2400 words
|
||||||
|
- Writes:
|
||||||
|
- `story/chapters/chapter-001/scene-003a.md`
|
||||||
|
- Updates durable state
|
||||||
|
|
||||||
|
---
|
||||||
|
- [ ] SCENE-032: Kremwerk furry rave
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- SCENE-031
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Proxy and DJ BLOODMONEY attend a crowded underground furry rave at Kremwerk following one of BLOODMONEY's pirate jungle sets.
|
||||||
|
|
||||||
|
The scene should establish:
|
||||||
|
- queer underground culture
|
||||||
|
- synthetic identity experimentation
|
||||||
|
- emotional sincerity beneath irony
|
||||||
|
- anti-corporate creative spaces
|
||||||
|
- generated aesthetics used communally rather than commercially
|
||||||
|
- shape of the romance between Proxy and BLOODMONEY
|
||||||
|
- makeout scene between proxy and bloodmoney
|
||||||
|
|
||||||
|
The rave should feel:
|
||||||
|
- affectionate
|
||||||
|
- overheated
|
||||||
|
- crowded
|
||||||
|
- emotionally necessary
|
||||||
|
|
||||||
|
Environmental details should include:
|
||||||
|
- soaked Capitol Hill sidewalks
|
||||||
|
- damp faux fur
|
||||||
|
- dangling extension cords powering chargers
|
||||||
|
- jungle edits mixed with bassline and hyperpop
|
||||||
|
- generated visuals projected onto concrete pillars
|
||||||
|
- patched jackets with dead startup logos
|
||||||
|
- kandi bracelets
|
||||||
|
- old server racks repurposed into lighting rigs
|
||||||
|
- rollerbladers moving through industrial hallways
|
||||||
|
- people discussing models like music genres
|
||||||
|
|
||||||
|
Proxy gradually realizes many attendees rely emotionally on systems like NightShift.
|
||||||
|
|
||||||
|
Acceptance Criteria:
|
||||||
|
- Avoids mocking underground/furry culture
|
||||||
|
- Strong sensory environmental detail
|
||||||
|
- Reinforces themes of synthetic companionship and community
|
||||||
|
- Includes subtle emotional unease beneath warmth
|
||||||
|
- Maintains grounded tone
|
||||||
|
- Scene length between 1800-3000 words
|
||||||
|
- Writes:
|
||||||
|
- `story/chapters/chapter-001/scene-003b.md`
|
||||||
|
- Updates durable state
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
- [ ] SCENE-004: Rich district delivery
|
- [ ] SCENE-004: Rich district delivery
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
- SCENE-003
|
- SCENE-032
|
||||||
|
|
||||||
Description:
|
Description:
|
||||||
Proxy delivers salvaged compute hardware to a wealthy private social club operating in a quiet offline district.
|
Proxy delivers salvaged compute hardware to a wealthy private social club operating in a quiet offline district.
|
||||||
|
|
@ -233,19 +323,59 @@ Proxy becomes uncomfortable with:
|
||||||
|
|
||||||
Acceptance Criteria:
|
Acceptance Criteria:
|
||||||
- Shows expanding underground compute economy
|
- Shows expanding underground compute economy
|
||||||
- Deepens Proxy’s internal conflict
|
- Deepens Proxy's internal conflict
|
||||||
- Introduces operational stress
|
- Introduces operational stress
|
||||||
- Maintains grounded tone
|
- Maintains grounded tone
|
||||||
- Writes:
|
- Writes:
|
||||||
- `story/chapters/chapter-002/scene-002.md`
|
- `story/chapters/chapter-002/scene-002.md`
|
||||||
- Updates durable state
|
- Updates durable state
|
||||||
|
|
||||||
|
---
|
||||||
|
- [ ] SCENE-081: Free inference night
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- SCENE-008
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Following a successful scavenging run, NightShift temporarily opens free public inference access for one evening.
|
||||||
|
|
||||||
|
Artists, musicians, lonely users, and exhausted workers flood the squat looking for compute access.
|
||||||
|
|
||||||
|
The scene should establish:
|
||||||
|
- NightShift as emotional infrastructure
|
||||||
|
- positive social uses of synthetic systems
|
||||||
|
- underground mutual aid culture
|
||||||
|
- growing operational stress
|
||||||
|
|
||||||
|
Examples should include:
|
||||||
|
- collaborative generated visuals
|
||||||
|
- musicians creating samples
|
||||||
|
- users generating outfit concepts before events
|
||||||
|
- emotionally vulnerable conversations with companion systems
|
||||||
|
- translation of old documents and messages
|
||||||
|
- communal experimentation with weird model outputs
|
||||||
|
|
||||||
|
Proxy slowly realizes NightShift has become psychologically essential for many people.
|
||||||
|
|
||||||
|
This realization unsettles her.
|
||||||
|
|
||||||
|
Acceptance Criteria:
|
||||||
|
- Avoids simplistic "AI bad" framing
|
||||||
|
- Balances warmth with discomfort
|
||||||
|
- Strong environmental detail
|
||||||
|
- Shows growing scale of NightShift operations
|
||||||
|
- Reinforces emotional dependency themes
|
||||||
|
- Scene length between 1800-3000 words
|
||||||
|
- Writes:
|
||||||
|
- `story/chapters/chapter-002/scene-002a.md`
|
||||||
|
- Updates durable state
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
- [ ] SCENE-009: Sister Circuit
|
- [ ] SCENE-009: Sister Circuit
|
||||||
|
|
||||||
Dependencies:
|
Dependencies:
|
||||||
- SCENE-008
|
- SCENE-081
|
||||||
|
|
||||||
Description:
|
Description:
|
||||||
Proxy meets Sister Circuit in a server monastery outside Tacoma.
|
Proxy meets Sister Circuit in a server monastery outside Tacoma.
|
||||||
|
|
|
||||||
143
nightshift/sandbox_run.py
Normal file
143
nightshift/sandbox_run.py
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
"""General-purpose setup-and-run sandbox command."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import venv
|
||||||
|
|
||||||
|
from .errors import NightShiftError
|
||||||
|
from .integ import _initialize_project_git_repo
|
||||||
|
from .integ_setup import IntegrationSetupResult, setup_python_project
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SandboxRunResult:
|
||||||
|
source_project: Path
|
||||||
|
directory: Path
|
||||||
|
project_dir: Path
|
||||||
|
venv_dir: Path
|
||||||
|
setup: IntegrationSetupResult
|
||||||
|
command: tuple[str, ...]
|
||||||
|
exit_code: int
|
||||||
|
dry_run: bool
|
||||||
|
|
||||||
|
|
||||||
|
def run_sandbox_project(
|
||||||
|
project: str | Path,
|
||||||
|
*,
|
||||||
|
output: str | Path | None = None,
|
||||||
|
timestamped: bool = False,
|
||||||
|
root: str | Path = ".",
|
||||||
|
task: str | None = None,
|
||||||
|
all_tasks: bool = False,
|
||||||
|
setup_extras: tuple[str, ...] = ("pytest",),
|
||||||
|
skip_setup_validate: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
animation: str = "status_dots",
|
||||||
|
no_animation: bool = False,
|
||||||
|
force: bool = False,
|
||||||
|
) -> SandboxRunResult:
|
||||||
|
"""Copy a NightShift project into a sandbox, set it up, and run it."""
|
||||||
|
|
||||||
|
if task and all_tasks:
|
||||||
|
raise NightShiftError("Sandbox run error: use either --task or --all, not both.")
|
||||||
|
if not task and not all_tasks:
|
||||||
|
raise NightShiftError("Sandbox run error: provide --task or --all.")
|
||||||
|
if output and timestamped:
|
||||||
|
raise NightShiftError("Sandbox run error: use either --output or --timestamped, not both.")
|
||||||
|
if not output and not timestamped:
|
||||||
|
raise NightShiftError("Sandbox run error: provide --output or --timestamped.")
|
||||||
|
|
||||||
|
source = Path(project).resolve()
|
||||||
|
if not source.exists() or not source.is_dir():
|
||||||
|
raise NightShiftError(f"Sandbox run error: project directory does not exist: {source}")
|
||||||
|
if not (source / "nightshift.yaml").exists():
|
||||||
|
raise NightShiftError(f"Sandbox run error: project does not contain nightshift.yaml: {source}")
|
||||||
|
|
||||||
|
sandbox_dir = _sandbox_directory(output, root=root, timestamped=timestamped)
|
||||||
|
project_dir = sandbox_dir / "project"
|
||||||
|
venv_dir = sandbox_dir / ".venv"
|
||||||
|
if project_dir.exists() and any(project_dir.iterdir()) and not force:
|
||||||
|
raise NightShiftError(f"Sandbox run error: output project already exists: {project_dir}")
|
||||||
|
|
||||||
|
sandbox_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
if project_dir.exists():
|
||||||
|
shutil.rmtree(project_dir)
|
||||||
|
shutil.copytree(source, project_dir, ignore=_copy_ignore)
|
||||||
|
if not dry_run:
|
||||||
|
if not venv_dir.exists():
|
||||||
|
venv.EnvBuilder(with_pip=True).create(venv_dir)
|
||||||
|
_initialize_project_git_repo(project_dir)
|
||||||
|
|
||||||
|
setup = setup_python_project(
|
||||||
|
project_dir,
|
||||||
|
extras=setup_extras,
|
||||||
|
validate=not skip_setup_validate,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
command = [str(setup.python), "-m", "nightshift.cli", "run"]
|
||||||
|
if no_animation:
|
||||||
|
command.append("--no-animation")
|
||||||
|
elif animation:
|
||||||
|
command.extend(["--animation", animation])
|
||||||
|
if all_tasks:
|
||||||
|
command.append("--all")
|
||||||
|
else:
|
||||||
|
command.extend(["--task", task or ""])
|
||||||
|
|
||||||
|
exit_code = 0
|
||||||
|
if not dry_run:
|
||||||
|
completed = subprocess.run(command, cwd=project_dir, text=True, encoding="utf-8", errors="replace")
|
||||||
|
exit_code = completed.returncode
|
||||||
|
|
||||||
|
return SandboxRunResult(
|
||||||
|
source_project=source,
|
||||||
|
directory=sandbox_dir,
|
||||||
|
project_dir=project_dir,
|
||||||
|
venv_dir=venv_dir,
|
||||||
|
setup=setup,
|
||||||
|
command=tuple(command),
|
||||||
|
exit_code=exit_code,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def format_sandbox_run_result(result: SandboxRunResult) -> str:
|
||||||
|
lines = [
|
||||||
|
f"Source project: {result.source_project}",
|
||||||
|
f"Sandbox: {result.directory}",
|
||||||
|
f"Project: {result.project_dir}",
|
||||||
|
f"Venv: {result.venv_dir}",
|
||||||
|
f"Run command: {' '.join(result.command)}",
|
||||||
|
f"Exit code: {result.exit_code}",
|
||||||
|
f"Artifacts: {result.project_dir / '.nightshift'}",
|
||||||
|
]
|
||||||
|
if result.dry_run:
|
||||||
|
lines.insert(0, "Dry run: true")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _sandbox_directory(output: str | Path | None, *, root: str | Path, timestamped: bool) -> Path:
|
||||||
|
if output:
|
||||||
|
return Path(output).resolve()
|
||||||
|
base = Path(root).resolve() / "integ_runs"
|
||||||
|
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
|
||||||
|
return base / run_id
|
||||||
|
|
||||||
|
|
||||||
|
def _copy_ignore(directory: str, names: list[str]) -> set[str]:
|
||||||
|
ignored = {
|
||||||
|
".git",
|
||||||
|
".pytest_cache",
|
||||||
|
".ruff_cache",
|
||||||
|
"__pycache__",
|
||||||
|
".venv",
|
||||||
|
"venv",
|
||||||
|
}
|
||||||
|
if Path(directory).name == ".nightshift":
|
||||||
|
ignored.update({"runs", "run-summary.md", "run.log", "project-context.md", "project-context-chart.md"})
|
||||||
|
return {name for name in names if name in ignored or name.endswith(".egg-info")}
|
||||||
|
|
@ -168,6 +168,7 @@ class TerminalAnimation:
|
||||||
self._width = 0
|
self._width = 0
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._last_rendered = ""
|
self._last_rendered = ""
|
||||||
|
self._last_status_line = ""
|
||||||
|
|
||||||
def __enter__(self) -> "TerminalAnimation":
|
def __enter__(self) -> "TerminalAnimation":
|
||||||
self.start()
|
self.start()
|
||||||
|
|
@ -194,6 +195,7 @@ class TerminalAnimation:
|
||||||
def update_message(self, message: str) -> None:
|
def update_message(self, message: str) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self.message = message
|
self.message = message
|
||||||
|
self._emit_status_line(message)
|
||||||
|
|
||||||
def emit(self, line: str) -> None:
|
def emit(self, line: str) -> None:
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
|
|
@ -238,6 +240,18 @@ class TerminalAnimation:
|
||||||
self.stream.write("\r" + (" " * self._width) + "\r")
|
self.stream.write("\r" + (" " * self._width) + "\r")
|
||||||
self.stream.flush()
|
self.stream.flush()
|
||||||
|
|
||||||
|
def _emit_status_line(self, message: str) -> None:
|
||||||
|
line = format_status_bar_message(message, stream=self.stream)
|
||||||
|
if line == self._last_status_line:
|
||||||
|
return
|
||||||
|
self._last_status_line = line
|
||||||
|
if self.enabled:
|
||||||
|
self._clear()
|
||||||
|
print(line)
|
||||||
|
self._render_frame(0)
|
||||||
|
return
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
|
||||||
def animation_frames(name: str) -> tuple[str, ...]:
|
def animation_frames(name: str) -> tuple[str, ...]:
|
||||||
frames = HOTDOG_ANIMATIONS.get(name)
|
frames = HOTDOG_ANIMATIONS.get(name)
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,93 @@ class PipelineRunnerTests(unittest.TestCase):
|
||||||
self.assertIn("Retry limit reached", result.reason)
|
self.assertIn("Retry limit reached", result.reason)
|
||||||
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])
|
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])
|
||||||
|
|
||||||
|
def test_malformed_review_gets_strict_retry_without_redrafting(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
(root / "fake_reviewer.py").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"import sys",
|
||||||
|
"prompt = sys.stdin.read()",
|
||||||
|
"if 'Previous review output was malformed' in prompt:",
|
||||||
|
" print('status: pass')",
|
||||||
|
" print('reason: strict retry ok')",
|
||||||
|
" print('next_stage: none')",
|
||||||
|
" print('context_update: none')",
|
||||||
|
"else:",
|
||||||
|
" print('files')",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
|
||||||
|
StageConfig(
|
||||||
|
id="review",
|
||||||
|
type="agent_review",
|
||||||
|
agent="reviewer",
|
||||||
|
on_fail="implement",
|
||||||
|
output="review.md",
|
||||||
|
),
|
||||||
|
StageConfig(id="summarize", type="summarize", output="final-notes.md"),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages, max_retries=2)
|
||||||
|
config.agents["reviewer"] = AgentConfig(
|
||||||
|
id="reviewer",
|
||||||
|
backend="command",
|
||||||
|
command="python fake_reviewer.py",
|
||||||
|
system_prompt=Path("reviewer.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
|
result = runner.run_task(task)
|
||||||
|
|
||||||
|
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
|
||||||
|
self.assertEqual(result.status, "complete")
|
||||||
|
self.assertEqual(result.retry_count, 0)
|
||||||
|
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "summarize"])
|
||||||
|
self.assertTrue((task_dir / "review.md").exists())
|
||||||
|
self.assertTrue((task_dir / "review-1.md").exists())
|
||||||
|
self.assertIn("files", (task_dir / "review.md").read_text(encoding="utf-8"))
|
||||||
|
self.assertIn("strict retry ok", (task_dir / "review-1.md").read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def test_malformed_review_stops_without_on_fail_redraft(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
_write_common_files(root)
|
||||||
|
(root / "fake_reviewer.py").write_text("print('files')\n", encoding="utf-8")
|
||||||
|
stages = (
|
||||||
|
StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
|
||||||
|
StageConfig(
|
||||||
|
id="review",
|
||||||
|
type="agent_review",
|
||||||
|
agent="reviewer",
|
||||||
|
on_fail="implement",
|
||||||
|
output="review.md",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
config = make_config(root, stages, max_retries=2)
|
||||||
|
config.agents["reviewer"] = AgentConfig(
|
||||||
|
id="reviewer",
|
||||||
|
backend="command",
|
||||||
|
command="python fake_reviewer.py",
|
||||||
|
system_prompt=Path("reviewer.md"),
|
||||||
|
)
|
||||||
|
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||||
|
task = parse_tasks(TASK_MD)[0]
|
||||||
|
|
||||||
|
result = runner.run_task(task)
|
||||||
|
|
||||||
|
task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
|
||||||
|
self.assertEqual(result.status, "failed")
|
||||||
|
self.assertEqual(result.retry_count, 0)
|
||||||
|
self.assertIn("remained malformed", result.reason)
|
||||||
|
self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review"])
|
||||||
|
self.assertTrue((task_dir / "review.md").exists())
|
||||||
|
self.assertTrue((task_dir / "review-1.md").exists())
|
||||||
|
|
||||||
def test_passing_review_next_stage_is_ignored(self) -> None:
|
def test_passing_review_next_stage_is_ignored(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
root = Path(directory)
|
root = Path(directory)
|
||||||
|
|
|
||||||
70
tests/test_sandbox_run.py
Normal file
70
tests/test_sandbox_run.py
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from nightshift.errors import NightShiftError
|
||||||
|
from nightshift.sandbox_run import format_sandbox_run_result, run_sandbox_project
|
||||||
|
|
||||||
|
|
||||||
|
class SandboxRunTests(unittest.TestCase):
|
||||||
|
def test_sandbox_run_dry_run_copies_existing_project_and_keeps_animation(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
source = root / "source"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||||
|
(source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
|
||||||
|
(source / ".nightshift").mkdir()
|
||||||
|
(source / ".nightshift" / "tasks.md").write_text("- [ ] TASK-001: Demo\n\nAcceptance Criteria:\n- done\n", encoding="utf-8")
|
||||||
|
(source / ".nightshift" / "runs").mkdir()
|
||||||
|
(source / ".nightshift" / "runs" / "old.txt").write_text("old artifact", encoding="utf-8")
|
||||||
|
output = root / "sandbox"
|
||||||
|
|
||||||
|
result = run_sandbox_project(
|
||||||
|
source,
|
||||||
|
output=output,
|
||||||
|
task="TASK-001",
|
||||||
|
dry_run=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
rendered = format_sandbox_run_result(result)
|
||||||
|
self.assertIn("Dry run: true", rendered)
|
||||||
|
self.assertEqual(result.project_dir, output / "project")
|
||||||
|
self.assertTrue((output / "project" / "nightshift.yaml").exists())
|
||||||
|
self.assertTrue((output / "project" / ".nightshift" / "tasks.md").exists())
|
||||||
|
self.assertFalse((output / "project" / ".nightshift" / "runs").exists())
|
||||||
|
self.assertIn("--animation", result.command)
|
||||||
|
self.assertNotIn("--no-animation", result.command)
|
||||||
|
self.assertIn("TASK-001", result.command)
|
||||||
|
|
||||||
|
def test_sandbox_run_timestamped_uses_integ_runs_directory(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
root = Path(directory)
|
||||||
|
source = root / "source"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||||
|
(source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
|
||||||
|
|
||||||
|
result = run_sandbox_project(
|
||||||
|
source,
|
||||||
|
root=root,
|
||||||
|
timestamped=True,
|
||||||
|
all_tasks=True,
|
||||||
|
dry_run=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(result.directory.parent, root / "integ_runs")
|
||||||
|
self.assertIn("--all", result.command)
|
||||||
|
|
||||||
|
def test_sandbox_run_requires_output_or_timestamped(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
|
source = Path(directory) / "source"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "nightshift.yaml").write_text("project:\n name: demo\n", encoding="utf-8")
|
||||||
|
|
||||||
|
with self.assertRaisesRegex(NightShiftError, "provide --output or --timestamped"):
|
||||||
|
run_sandbox_project(source, task="TASK-001", dry_run=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
|
|
@ -57,6 +57,17 @@ class TerminalStylingTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(output.getvalue().strip(), "plain log")
|
self.assertEqual(output.getvalue().strip(), "plain log")
|
||||||
|
|
||||||
|
def test_terminal_animation_status_update_prints_for_non_tty(self) -> None:
|
||||||
|
stream = StringIO()
|
||||||
|
output = StringIO()
|
||||||
|
animation = TerminalAnimation(stream=stream)
|
||||||
|
|
||||||
|
with patch("sys.stdout", output):
|
||||||
|
animation.update_message("Task: TASK-001 | >> Stage: plan")
|
||||||
|
|
||||||
|
self.assertIn("[NightShift]", output.getvalue())
|
||||||
|
self.assertIn("Stage: plan", output.getvalue())
|
||||||
|
|
||||||
def test_terminal_animation_renders_immediately_when_started(self) -> None:
|
def test_terminal_animation_renders_immediately_when_started(self) -> None:
|
||||||
stream = FakeTTY()
|
stream = FakeTTY()
|
||||||
animation = TerminalAnimation(
|
animation = TerminalAnimation(
|
||||||
|
|
@ -85,10 +96,12 @@ class TerminalStylingTests(unittest.TestCase):
|
||||||
with patch("sys.stdout", output):
|
with patch("sys.stdout", output):
|
||||||
animation.start()
|
animation.start()
|
||||||
animation.emit("log line")
|
animation.emit("log line")
|
||||||
|
animation.update_message("Stage: write")
|
||||||
stream_output = stream.getvalue()
|
stream_output = stream.getvalue()
|
||||||
animation.stop()
|
animation.stop()
|
||||||
|
|
||||||
self.assertIn("log line", output.getvalue())
|
self.assertIn("log line", output.getvalue())
|
||||||
|
self.assertIn("Stage: write", output.getvalue())
|
||||||
self.assertGreaterEqual(stream_output.count("Stage: plan"), 2)
|
self.assertGreaterEqual(stream_output.count("Stage: plan"), 2)
|
||||||
|
|
||||||
def test_format_status_bar_message_uses_status_color(self) -> None:
|
def test_format_status_bar_message_uses_status_color(self) -> None:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user