Project runner tool and writer tooling fixes

i wanted the animated status bar...
2026-06-14 10:08:37 +00:00 · 2026-05-22 04:17:37 -07:00 · 2026-05-22 04:17:37 -07:00 · a0ad9b2ac0
commit a0ad9b2ac0
parent d928a52fb3
9 changed files with 614 additions and 5 deletions
--- a/docs/bugs.md
+++ b/docs/bugs.md
@ -0,0 +1,3 @@
 # descriptions for logs are slightly off for the status thing. "Starting ollama HTTP model invocation" implies that it's stuck starting when it's not.
 # We've stopped updating the version.
--- a/nightshift/cli.py
+++ b/nightshift/cli.py
@ -15,6 +15,7 @@ from .integ_setup import format_setup_result, setup_python_project
 from .integ_test import format_integration_test_result, run_integration_test
 from .pipeline import PipelineRunner
 from .runlog import RunLogger
 from .sandbox_run import format_sandbox_run_result, run_sandbox_project
 from .status import build_status, format_status
 from .task_tests import check_task_test_files, format_task_test_checks, missing_task_test_paths
 from .terminal import HOTDOG_ANIMATIONS, TerminalAnimation, format_banner, style_text
@ -131,6 +132,38 @@ def build_parser() -> argparse.ArgumentParser:
    integ_test_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
    integ_test_parser.add_argument("--dry-run", action="store_true", help="Print commands without running setup or tasks.")
    sandbox_parser = subparsers.add_parser(
        "sandbox-run",
        help="Copy an existing NightShift project into a sandbox, set it up, and run it.",
    )
    sandbox_parser.add_argument("--project", required=True, help="Existing NightShift project directory to copy.")
    sandbox_output = sandbox_parser.add_mutually_exclusive_group(required=True)
    sandbox_output.add_argument("--output", help="Sandbox output directory. The project is copied to OUTPUT/project.")
    sandbox_output.add_argument(
        "--timestamped",
        action="store_true",
        help="Create a timestamped sandbox under ROOT/integ_runs, like integ-test.",
    )
    sandbox_parser.add_argument("--root", default=".", help="Root used with --timestamped. Defaults to current directory.")
    sandbox_parser.add_argument("--task", help="Specific task id to run.")
    sandbox_parser.add_argument("--all", action="store_true", help="Run all runnable incomplete tasks.")
    sandbox_parser.add_argument("--force", action="store_true", help="Overwrite an existing OUTPUT/project copy.")
    sandbox_parser.add_argument(
        "--setup-extra",
        action="append",
        default=["pytest"],
        help="Extra package to install during setup. May be repeated. Defaults to pytest.",
    )
    sandbox_parser.add_argument("--setup-skip-validate", action="store_true", help="Skip validation during setup.")
    sandbox_parser.add_argument("--dry-run", action="store_true", help="Create the sandbox copy and print commands without running setup or tasks.")
    sandbox_parser.add_argument(
        "--animation",
        default="status_dots",
        choices=tuple(sorted(HOTDOG_ANIMATIONS)),
        help="Terminal animation to show while the sandboxed run is active.",
    )
    sandbox_parser.add_argument("--no-animation", action="store_true", help="Disable terminal animation.")
    integ_report_parser = subparsers.add_parser("integ-report", help="Summarize the latest integration run.")
    integ_report_parser.add_argument("--root", default=".", help="Repository root where integ_runs/ is located.")
    integ_report_parser.add_argument("--latest", action="store_true", help="Report the latest integration run.")
@ -309,6 +342,24 @@ def main(argv: list[str] | None = None) -> int:
            print(format_integration_test_result(result))
            return result.exit_code
        if args.command == "sandbox-run":
            result = run_sandbox_project(
                args.project,
                output=args.output,
                timestamped=args.timestamped,
                root=args.root,
                task=args.task,
                all_tasks=args.all,
                setup_extras=tuple(args.setup_extra or ()),
                skip_setup_validate=args.setup_skip_validate,
                dry_run=args.dry_run,
                animation=args.animation,
                no_animation=args.no_animation,
                force=args.force,
            )
            print(format_sandbox_run_result(result))
            return result.exit_code
        if args.command == "integ-report":
            report = build_integration_report(args.root, latest=True)
            print(format_integration_report(report))
--- a/nightshift/pipeline.py
+++ b/nightshift/pipeline.py
@ -229,7 +229,11 @@ class PipelineRunner:
                index += 1
                continue
-            target_stage = stage.on_fail or result.next_stage
+            target_stage = result.next_stage or (
                stage.on_fail
                if not (stage.type in {"agent_review", "review"} and _is_malformed_review_result(result))
                else None
            )
            analysis_note = self._write_failure_diagnostics(stage, task, result, retry_count)
            if analysis_note:
                retry_notes.append(analysis_note)
@ -481,7 +485,7 @@ class PipelineRunner:
            result = self.agent_executor.run_stage(
                self._stage_for_retry_agent(stage, retry_count),
                task,
-                previous_outputs,
+                _review_previous_outputs(previous_outputs) if stage.type in {"agent_review", "review"} else previous_outputs,
                retry_notes,
                project_context=context.project_context,
                task_context=context.task_context,
@ -501,6 +505,17 @@ class PipelineRunner:
                    context.task_context,
                    context.retry_context,
                )
            if stage.type in {"agent_review", "review"} and _is_malformed_review_result(result):
                return self._rerun_malformed_review(
                    stage,
                    task,
                    result,
                    previous_outputs,
                    retry_notes,
                    retry_count,
                    context.project_context,
                    context.task_context,
                )
            return result
        if stage.type in COMMAND_STAGE_TYPES:
            return self.command_executor.run_stage(_stage_with_attempt_output(stage, retry_count), task.id)
@ -1217,6 +1232,59 @@ class PipelineRunner:
        )
        return f"Debugger output: {debug_result.output_path or 'none'}."
    def _rerun_malformed_review(
        self,
        stage: StageConfig,
        task: Task,
        malformed_result: StageResult,
        previous_outputs: dict[str, str],
        retry_notes: list[str],
        retry_count: int,
        project_context: str,
        task_context: str,
    ) -> StageResult:
        output_name = _attempt_filename(stage.output or f"{stage.id}.md", retry_count + 1)
        strict_stage = replace(
            self._stage_for_retry_agent(stage, retry_count),
            output=output_name,
        )
        self.logger.event(
            "agent.rerun",
            "Re-running review after malformed output",
            stage_id=stage.id,
            task_id=task.id,
        )
        strict_notes = [
            *retry_notes,
            "Previous review output was malformed. Return exactly four lines: status, reason, next_stage, context_update. Do not return prose, headings, or analysis.",
        ]
        strict_outputs = _review_previous_outputs(previous_outputs)
        strict_outputs["malformed_review_output"] = _compact_previous_output(
            self._read_output(malformed_result.output_path),
            max_chars=800,
        )
        result = self.agent_executor.run_stage(
            strict_stage,
            task,
            strict_outputs,
            strict_notes,
            project_context=project_context,
            task_context=task_context,
            retry_context="\n".join(f"- {note}" for note in strict_notes),
        )
        if _is_malformed_review_result(result):
            return StageResult(
                result.stage_id,
                "fail",
                (
                    "Review output remained malformed after a strict formatting retry. "
                    "Stopping without redrafting; inspect the applied draft and review artifact."
                ),
                output_path=result.output_path,
                context_update=result.context_update,
            )
        return result
    def _modified_files(self) -> tuple[str, ...]:
        completed = subprocess.run(
            ["git", "status", "--short"],
@ -1608,6 +1676,36 @@ def _invalid_file_writer_output_summary(output: str, reason: str, max_chars: int
    return "\n".join(lines)
 def _is_malformed_review_result(result: StageResult) -> bool:
    return result.status == "fail" and (
        "Review output did not include a valid status" in result.reason
        or "Review output remained malformed" in result.reason
    )
 def _review_previous_outputs(previous_outputs: dict[str, str], max_chars: int = 1600) -> dict[str, str]:
    compacted: dict[str, str] = {}
    priority_names = {
        "applied.patch",
        "normalized-draft.patch",
        "scene-draft.patch",
        "draft_scene",
        "apply_draft",
        "validate_draft",
        "test",
        "review",
    }
    for name, output in previous_outputs.items():
        if name in priority_names or name.endswith(".patch") or "draft" in name or "apply" in name:
            compacted[name] = _compact_previous_output(output, max_chars=max_chars)
            continue
        if name in {"plan", "semantic_context", "context"}:
            compacted[name] = _compact_previous_output(output, max_chars=500)
            continue
        compacted[name] = _compact_previous_output(output, max_chars=800)
    return compacted
 def _file_writer_error_reason(stage: StageConfig, reason: str) -> str:
    guidance = _file_writer_stage_guidance(stage)
    if not guidance or "not allowed for this stage" not in reason:
--- a/nightshift/project_templates/tutorial-novel/.nightshift/tasks.md
+++ b/nightshift/project_templates/tutorial-novel/.nightshift/tasks.md
@ -109,11 +109,101 @@ Acceptance Criteria:
 - Updates durable state
 ---
 - [ ] SCENE-031: Rollerblade courier run
 Dependencies:
 - SCENE-003
 Description:
 Proxy and Cricket rollerblade through late-night Seattle delivering encrypted NightShift inference keys, salvaged hardware, and cached datasets between squatters, artists, and underground operators.
 The scene should establish:
 - movement through the city
 - underground mutual aid systems
 - degraded urban infrastructure
 - physical geography of Seattle
 - emotional intimacy through transit
 Environmental details should emphasize:
 - wet pavement reflecting neon transit signage
 - abandoned autonomous delivery vehicles
 - late-night teriyaki shops
 - extension cords hanging between apartments
 - cracked sidewalks
 - rooftop antennas
 - stale vape clouds in freight elevators
 A subtle anomaly appears when an unrelated ad display briefly shows imagery identical to visuals seen elsewhere in the story.
 Nobody reacts strongly.
 Acceptance Criteria:
 - Strong Seattle atmosphere
 - Deepens Proxy and Cricket relationship naturally
 - Includes rollerblading materially throughout the scene
 - Introduces subtle recurring anomaly
 - Avoids exposition-heavy dialogue
 - Scene length between 1400-2400 words
 - Writes:
  - `story/chapters/chapter-001/scene-003a.md`
 - Updates durable state
 ---
 - [ ] SCENE-032: Kremwerk furry rave
 Dependencies:
 - SCENE-031
 Description:
 Proxy and DJ BLOODMONEY attend a crowded underground furry rave at Kremwerk following one of BLOODMONEY's pirate jungle sets.
 The scene should establish:
 - queer underground culture
 - synthetic identity experimentation
 - emotional sincerity beneath irony
 - anti-corporate creative spaces
 - generated aesthetics used communally rather than commercially
 - shape of the romance between Proxy and BLOODMONEY
 - makeout scene between proxy and bloodmoney
 The rave should feel:
 - affectionate
 - overheated
 - crowded
 - emotionally necessary
 Environmental details should include:
 - soaked Capitol Hill sidewalks
 - damp faux fur
 - dangling extension cords powering chargers
 - jungle edits mixed with bassline and hyperpop
 - generated visuals projected onto concrete pillars
 - patched jackets with dead startup logos
 - kandi bracelets
 - old server racks repurposed into lighting rigs
 - rollerbladers moving through industrial hallways
 - people discussing models like music genres
 Proxy gradually realizes many attendees rely emotionally on systems like NightShift.
 Acceptance Criteria:
 - Avoids mocking underground/furry culture
 - Strong sensory environmental detail
 - Reinforces themes of synthetic companionship and community
 - Includes subtle emotional unease beneath warmth
 - Maintains grounded tone
 - Scene length between 1800-3000 words
 - Writes:
  - `story/chapters/chapter-001/scene-003b.md`
 - Updates durable state
 ---
 - [ ] SCENE-004: Rich district delivery
 Dependencies:
- SCENE-003
+- SCENE-032
 Description:
 Proxy delivers salvaged compute hardware to a wealthy private social club operating in a quiet offline district.
@ -233,19 +323,59 @@ Proxy becomes uncomfortable with:
 Acceptance Criteria:
 - Shows expanding underground compute economy
- Deepens Proxy’s internal conflict
+- Deepens Proxy's internal conflict
 - Introduces operational stress
 - Maintains grounded tone
 - Writes:
  - `story/chapters/chapter-002/scene-002.md`
 - Updates durable state
 ---
 - [ ] SCENE-081: Free inference night
 Dependencies:
 - SCENE-008
 Description:
 Following a successful scavenging run, NightShift temporarily opens free public inference access for one evening.
 Artists, musicians, lonely users, and exhausted workers flood the squat looking for compute access.
 The scene should establish:
 - NightShift as emotional infrastructure
 - positive social uses of synthetic systems
 - underground mutual aid culture
 - growing operational stress
 Examples should include:
 - collaborative generated visuals
 - musicians creating samples
 - users generating outfit concepts before events
 - emotionally vulnerable conversations with companion systems
 - translation of old documents and messages
 - communal experimentation with weird model outputs
 Proxy slowly realizes NightShift has become psychologically essential for many people.
 This realization unsettles her.
 Acceptance Criteria:
 - Avoids simplistic "AI bad" framing
 - Balances warmth with discomfort
 - Strong environmental detail
 - Shows growing scale of NightShift operations
 - Reinforces emotional dependency themes
 - Scene length between 1800-3000 words
 - Writes:
  - `story/chapters/chapter-002/scene-002a.md`
 - Updates durable state
 ---
 - [ ] SCENE-009: Sister Circuit
 Dependencies:
- SCENE-008
+- SCENE-081
 Description:
 Proxy meets Sister Circuit in a server monastery outside Tacoma.
--- a/nightshift/sandbox_run.py
+++ b/nightshift/sandbox_run.py
@ -0,0 +1,143 @@
 """General-purpose setup-and-run sandbox command."""
 from __future__ import annotations
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 import shutil
 import subprocess
 import venv
 from .errors import NightShiftError
 from .integ import _initialize_project_git_repo
 from .integ_setup import IntegrationSetupResult, setup_python_project
@dataclass(frozen=True)
 class SandboxRunResult:
    source_project: Path
    directory: Path
    project_dir: Path
    venv_dir: Path
    setup: IntegrationSetupResult
    command: tuple[str, ...]
    exit_code: int
    dry_run: bool
 def run_sandbox_project(
    project: str | Path,
    *,
    output: str | Path | None = None,
    timestamped: bool = False,
    root: str | Path = ".",
    task: str | None = None,
    all_tasks: bool = False,
    setup_extras: tuple[str, ...] = ("pytest",),
    skip_setup_validate: bool = False,
    dry_run: bool = False,
    animation: str = "status_dots",
    no_animation: bool = False,
    force: bool = False,
 ) -> SandboxRunResult:
    """Copy a NightShift project into a sandbox, set it up, and run it."""
    if task and all_tasks:
        raise NightShiftError("Sandbox run error: use either --task or --all, not both.")
    if not task and not all_tasks:
        raise NightShiftError("Sandbox run error: provide --task or --all.")
    if output and timestamped:
        raise NightShiftError("Sandbox run error: use either --output or --timestamped, not both.")
    if not output and not timestamped:
        raise NightShiftError("Sandbox run error: provide --output or --timestamped.")
    source = Path(project).resolve()
    if not source.exists() or not source.is_dir():
        raise NightShiftError(f"Sandbox run error: project directory does not exist: {source}")
    if not (source / "nightshift.yaml").exists():
        raise NightShiftError(f"Sandbox run error: project does not contain nightshift.yaml: {source}")
    sandbox_dir = _sandbox_directory(output, root=root, timestamped=timestamped)
    project_dir = sandbox_dir / "project"
    venv_dir = sandbox_dir / ".venv"
    if project_dir.exists() and any(project_dir.iterdir()) and not force:
        raise NightShiftError(f"Sandbox run error: output project already exists: {project_dir}")
    sandbox_dir.mkdir(parents=True, exist_ok=True)
    if project_dir.exists():
        shutil.rmtree(project_dir)
    shutil.copytree(source, project_dir, ignore=_copy_ignore)
    if not dry_run:
        if not venv_dir.exists():
            venv.EnvBuilder(with_pip=True).create(venv_dir)
        _initialize_project_git_repo(project_dir)
    setup = setup_python_project(
        project_dir,
        extras=setup_extras,
        validate=not skip_setup_validate,
        dry_run=dry_run,
    )
    command = [str(setup.python), "-m", "nightshift.cli", "run"]
    if no_animation:
        command.append("--no-animation")
    elif animation:
        command.extend(["--animation", animation])
    if all_tasks:
        command.append("--all")
    else:
        command.extend(["--task", task or ""])
    exit_code = 0
    if not dry_run:
        completed = subprocess.run(command, cwd=project_dir, text=True, encoding="utf-8", errors="replace")
        exit_code = completed.returncode
    return SandboxRunResult(
        source_project=source,
        directory=sandbox_dir,
        project_dir=project_dir,
        venv_dir=venv_dir,
        setup=setup,
        command=tuple(command),
        exit_code=exit_code,
        dry_run=dry_run,
    )
 def format_sandbox_run_result(result: SandboxRunResult) -> str:
    lines = [
        f"Source project: {result.source_project}",
        f"Sandbox: {result.directory}",
        f"Project: {result.project_dir}",
        f"Venv: {result.venv_dir}",
        f"Run command: {' '.join(result.command)}",
        f"Exit code: {result.exit_code}",
        f"Artifacts: {result.project_dir / '.nightshift'}",
    ]
    if result.dry_run:
        lines.insert(0, "Dry run: true")
    return "\n".join(lines)
 def _sandbox_directory(output: str | Path | None, *, root: str | Path, timestamped: bool) -> Path:
    if output:
        return Path(output).resolve()
    base = Path(root).resolve() / "integ_runs"
    run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
    return base / run_id
 def _copy_ignore(directory: str, names: list[str]) -> set[str]:
    ignored = {
        ".git",
        ".pytest_cache",
        ".ruff_cache",
        "__pycache__",
        ".venv",
        "venv",
    }
    if Path(directory).name == ".nightshift":
        ignored.update({"runs", "run-summary.md", "run.log", "project-context.md", "project-context-chart.md"})
    return {name for name in names if name in ignored or name.endswith(".egg-info")}
--- a/nightshift/terminal.py
+++ b/nightshift/terminal.py
@ -168,6 +168,7 @@ class TerminalAnimation:
        self._width = 0
        self._lock = threading.Lock()
        self._last_rendered = ""
        self._last_status_line = ""
    def __enter__(self) -> "TerminalAnimation":
        self.start()
@ -194,6 +195,7 @@ class TerminalAnimation:
    def update_message(self, message: str) -> None:
        with self._lock:
            self.message = message
        self._emit_status_line(message)
    def emit(self, line: str) -> None:
        if not self.enabled:
@ -238,6 +240,18 @@ class TerminalAnimation:
        self.stream.write("\r" + (" " * self._width) + "\r")
        self.stream.flush()
    def _emit_status_line(self, message: str) -> None:
        line = format_status_bar_message(message, stream=self.stream)
        if line == self._last_status_line:
            return
        self._last_status_line = line
        if self.enabled:
            self._clear()
            print(line)
            self._render_frame(0)
            return
        print(line)
 def animation_frames(name: str) -> tuple[str, ...]:
    frames = HOTDOG_ANIMATIONS.get(name)
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -153,6 +153,93 @@ class PipelineRunnerTests(unittest.TestCase):
            self.assertIn("Retry limit reached", result.reason)
            self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "implement", "review", "implement", "review"])
    def test_malformed_review_gets_strict_retry_without_redrafting(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            (root / "fake_reviewer.py").write_text(
                "\n".join(
                    [
                        "import sys",
                        "prompt = sys.stdin.read()",
                        "if 'Previous review output was malformed' in prompt:",
                        "    print('status: pass')",
                        "    print('reason: strict retry ok')",
                        "    print('next_stage: none')",
                        "    print('context_update: none')",
                        "else:",
                        "    print('files')",
                    ]
                ),
                encoding="utf-8",
            )
            stages = (
                StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
                StageConfig(
                    id="review",
                    type="agent_review",
                    agent="reviewer",
                    on_fail="implement",
                    output="review.md",
                ),
                StageConfig(id="summarize", type="summarize", output="final-notes.md"),
            )
            config = make_config(root, stages, max_retries=2)
            config.agents["reviewer"] = AgentConfig(
                id="reviewer",
                backend="command",
                command="python fake_reviewer.py",
                system_prompt=Path("reviewer.md"),
            )
            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
            task = parse_tasks(TASK_MD)[0]
            result = runner.run_task(task)
            task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
            self.assertEqual(result.status, "complete")
            self.assertEqual(result.retry_count, 0)
            self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review", "summarize"])
            self.assertTrue((task_dir / "review.md").exists())
            self.assertTrue((task_dir / "review-1.md").exists())
            self.assertIn("files", (task_dir / "review.md").read_text(encoding="utf-8"))
            self.assertIn("strict retry ok", (task_dir / "review-1.md").read_text(encoding="utf-8"))
    def test_malformed_review_stops_without_on_fail_redraft(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            _write_common_files(root)
            (root / "fake_reviewer.py").write_text("print('files')\n", encoding="utf-8")
            stages = (
                StageConfig(id="implement", type="agent", agent="planner", output="implementation-log.md"),
                StageConfig(
                    id="review",
                    type="agent_review",
                    agent="reviewer",
                    on_fail="implement",
                    output="review.md",
                ),
            )
            config = make_config(root, stages, max_retries=2)
            config.agents["reviewer"] = AgentConfig(
                id="reviewer",
                backend="command",
                command="python fake_reviewer.py",
                system_prompt=Path("reviewer.md"),
            )
            runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
            task = parse_tasks(TASK_MD)[0]
            result = runner.run_task(task)
            task_dir = root / ".nightshift" / "runs" / "test-run" / "tasks" / task.id
            self.assertEqual(result.status, "failed")
            self.assertEqual(result.retry_count, 0)
            self.assertIn("remained malformed", result.reason)
            self.assertEqual([item.stage_id for item in result.stage_results], ["implement", "review"])
            self.assertTrue((task_dir / "review.md").exists())
            self.assertTrue((task_dir / "review-1.md").exists())
    def test_passing_review_next_stage_is_ignored(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
--- a/tests/test_sandbox_run.py
+++ b/tests/test_sandbox_run.py
@ -0,0 +1,70 @@
 from pathlib import Path
 import tempfile
 import unittest
 from nightshift.errors import NightShiftError
 from nightshift.sandbox_run import format_sandbox_run_result, run_sandbox_project
 class SandboxRunTests(unittest.TestCase):
    def test_sandbox_run_dry_run_copies_existing_project_and_keeps_animation(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            source = root / "source"
            source.mkdir()
            (source / "nightshift.yaml").write_text("project:\n  name: demo\n", encoding="utf-8")
            (source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
            (source / ".nightshift").mkdir()
            (source / ".nightshift" / "tasks.md").write_text("- [ ] TASK-001: Demo\n\nAcceptance Criteria:\n- done\n", encoding="utf-8")
            (source / ".nightshift" / "runs").mkdir()
            (source / ".nightshift" / "runs" / "old.txt").write_text("old artifact", encoding="utf-8")
            output = root / "sandbox"
            result = run_sandbox_project(
                source,
                output=output,
                task="TASK-001",
                dry_run=True,
            )
            rendered = format_sandbox_run_result(result)
            self.assertIn("Dry run: true", rendered)
            self.assertEqual(result.project_dir, output / "project")
            self.assertTrue((output / "project" / "nightshift.yaml").exists())
            self.assertTrue((output / "project" / ".nightshift" / "tasks.md").exists())
            self.assertFalse((output / "project" / ".nightshift" / "runs").exists())
            self.assertIn("--animation", result.command)
            self.assertNotIn("--no-animation", result.command)
            self.assertIn("TASK-001", result.command)
    def test_sandbox_run_timestamped_uses_integ_runs_directory(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            root = Path(directory)
            source = root / "source"
            source.mkdir()
            (source / "nightshift.yaml").write_text("project:\n  name: demo\n", encoding="utf-8")
            (source / "pyproject.toml").write_text("[project]\nname = 'demo'\nversion = '0.1.0'\n", encoding="utf-8")
            result = run_sandbox_project(
                source,
                root=root,
                timestamped=True,
                all_tasks=True,
                dry_run=True,
            )
            self.assertEqual(result.directory.parent, root / "integ_runs")
            self.assertIn("--all", result.command)
    def test_sandbox_run_requires_output_or_timestamped(self) -> None:
        with tempfile.TemporaryDirectory() as directory:
            source = Path(directory) / "source"
            source.mkdir()
            (source / "nightshift.yaml").write_text("project:\n  name: demo\n", encoding="utf-8")
            with self.assertRaisesRegex(NightShiftError, "provide --output or --timestamped"):
                run_sandbox_project(source, task="TASK-001", dry_run=True)
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_terminal.py
+++ b/tests/test_terminal.py
@ -57,6 +57,17 @@ class TerminalStylingTests(unittest.TestCase):
        self.assertEqual(output.getvalue().strip(), "plain log")
    def test_terminal_animation_status_update_prints_for_non_tty(self) -> None:
        stream = StringIO()
        output = StringIO()
        animation = TerminalAnimation(stream=stream)
        with patch("sys.stdout", output):
            animation.update_message("Task: TASK-001 | >> Stage: plan")
        self.assertIn("[NightShift]", output.getvalue())
        self.assertIn("Stage: plan", output.getvalue())
    def test_terminal_animation_renders_immediately_when_started(self) -> None:
        stream = FakeTTY()
        animation = TerminalAnimation(
@ -85,10 +96,12 @@ class TerminalStylingTests(unittest.TestCase):
        with patch("sys.stdout", output):
            animation.start()
            animation.emit("log line")
            animation.update_message("Stage: write")
            stream_output = stream.getvalue()
            animation.stop()
        self.assertIn("log line", output.getvalue())
        self.assertIn("Stage: write", output.getvalue())
        self.assertGreaterEqual(stream_output.count("Stage: plan"), 2)
    def test_format_status_bar_message_uses_status_color(self) -> None:
		`@ -0,0 +1,3 @@`
							`# descriptions for logs are slightly off for the status thing. "Starting ollama HTTP model invocation" implies that it's stuck starting when it's not.`

							`# We've stopped updating the version.`