setup scripts improvement and what happened

- Added nightshift what-happened to summarize the latest run/task failure from artifacts.
  - Added integ-run --setup with setup options, so sandbox creation can immediately run integ-setup.
  - integ-setup output now explicitly shows the venv activation command.
  - Command stages now prefer a detected project/adjacent .venv, so python -m pytest -q should use the integ venv without relying on shell activation.
  - Retry command outputs now get attempt-specific artifact names, like test-output-1.txt, instead of overwriting.
  - Docs updated in README.md, the pastebin template README, and examples/tutorial/03-pastebin.
  - Added pytest config so root python -m pytest -q ignores generated integ/template target tests.
  - Version bumped to 0.2.4-alpha-bratwurst-relish.
This commit is contained in:
K. Hodges 2026-05-20 04:20:15 -07:00
parent 93a50ddb42
commit 2f2146f47d
14 changed files with 479 additions and 12 deletions

View File

@ -133,13 +133,25 @@ Create an isolated integration sandbox for a template:
python -m nightshift.cli integ-run --template tutorial-pastebin
```
To create the sandbox and run the Python setup immediately:
```bash
python -m nightshift.cli integ-run --template tutorial-pastebin --setup
```
Then run the Python project setup helper. It finds the generated venv, installs this NightShift checkout into it, installs the target project, installs pytest by default, and runs `nightshift validate`:
```bash
python -m nightshift.cli integ-setup --project integ_runs/<timestamp>/project
```
After setup, run from the generated project with the venv Python:
`integ-setup` cannot activate the venv for your current shell. If you want plain `python` and `nightshift` to resolve to the integration venv in PowerShell, run:
```powershell
integ_runs\<timestamp>\.venv\Scripts\Activate.ps1
```
After setup, you can also run from the generated project with the explicit venv Python:
```powershell
integ_runs\<timestamp>\.venv\Scripts\python.exe -m nightshift.cli run --task TASK-001
@ -151,6 +163,12 @@ Bash:
integ_runs/<timestamp>/.venv/bin/python -m nightshift.cli run --task TASK-001
```
After a run, explain the latest pass or failure from artifacts:
```bash
nightshift what-happened
```
Open the read-only artifact dashboard:
```bash

View File

@ -19,12 +19,24 @@ For an isolated local integration run, use the integration sandbox command from
python -m nightshift.cli integ-run --template tutorial-pastebin
```
To create the sandbox and set up the Python project immediately:
```bash
python -m nightshift.cli integ-run --template tutorial-pastebin --setup
```
Then set up the generated Python project:
```bash
python -m nightshift.cli integ-setup --project integ_runs/<timestamp>/project
```
`integ-setup` cannot activate the venv for your current shell. In PowerShell, activate it manually if you want plain `python` and `nightshift` to use the integration venv:
```powershell
integ_runs\<timestamp>\.venv\Scripts\Activate.ps1
```
The template creates:
```text
@ -95,6 +107,7 @@ Run one task first:
```bash
python -m nightshift.cli validate
python -m nightshift.cli run --task TASK-001
python -m nightshift.cli what-happened
```
Then inspect:

View File

@ -24,6 +24,7 @@ from .tasks import (
)
from .version import display_version
from .web import create_app
from .what_happened import build_what_happened
def build_parser() -> argparse.ArgumentParser:
@ -60,6 +61,14 @@ def build_parser() -> argparse.ArgumentParser:
status_parser = subparsers.add_parser("status", help="Inspect NightShift project status.")
status_parser.add_argument("--config", default="nightshift.yaml", help="Config file to inspect.")
happened_parser = subparsers.add_parser(
"what-happened",
help="Explain the latest NightShift run from local artifacts.",
)
happened_parser.add_argument("--config", default="nightshift.yaml", help="Config file to inspect.")
happened_parser.add_argument("--run", default="latest", help="Run id to inspect. Defaults to latest.")
happened_parser.add_argument("--task", help="Task id to inspect. Defaults to the latest task artifact.")
web_parser = subparsers.add_parser("web", help="Start a read-only artifact dashboard.")
web_parser.add_argument("--config", default="nightshift.yaml", help="Config file to inspect.")
web_parser.add_argument("--host", default="127.0.0.1", help="Host to bind.")
@ -74,6 +83,27 @@ def build_parser() -> argparse.ArgumentParser:
help="Template to initialize inside the sandbox.",
)
integ_parser.add_argument("--keep", type=int, help="Keep only the newest N old integration runs before creating a new one.")
integ_parser.add_argument(
"--setup",
action="store_true",
help="Run integ-setup for the generated Python project after creating the sandbox.",
)
integ_parser.add_argument(
"--setup-extra",
action="append",
default=["pytest"],
help="Extra package for --setup. May be repeated. Defaults to pytest.",
)
integ_parser.add_argument(
"--setup-skip-validate",
action="store_true",
help="Skip validation during --setup.",
)
integ_parser.add_argument(
"--setup-dry-run",
action="store_true",
help="Print --setup commands without running them.",
)
setup_parser = subparsers.add_parser(
"integ-setup",
@ -181,6 +211,17 @@ def main(argv: list[str] | None = None) -> int:
print(format_status(build_status(config, tasks)))
return 0
if args.command == "what-happened":
config = validate_config(args.config)
report = build_what_happened(
config.project.root,
config.project.artifact_dir,
run_id=args.run,
task_id=args.task,
)
print(report.content)
return 0
if args.command == "web":
config = validate_config(args.config)
app = create_app(config.project.root, config.project.artifact_dir)
@ -193,6 +234,15 @@ def main(argv: list[str] | None = None) -> int:
print(f"Venv: {run.venv_dir}")
print(f"Log: {run.log_path}")
print(f"Setup: python -m nightshift.cli integ-setup --project {run.directory / 'project'}")
if args.setup:
result = setup_python_project(
run.directory / "project",
extras=tuple(args.setup_extra or ()),
validate=not args.setup_skip_validate,
dry_run=args.setup_dry_run,
)
print("")
print(format_setup_result(result))
return 0
if args.command == "integ-setup":

View File

@ -138,7 +138,7 @@ class CommandExecutor:
raise CommandError(str(exc)) from exc
timeout = timeout_seconds or self.timeout_seconds
args: str | list[str] = normalized if shell else shlex.split(normalized)
env = _command_env(self.safety.allowed_env)
env = _command_env(self.safety.allowed_env, project_root=self.project_root)
started = time.monotonic()
process = subprocess.Popen(
@ -218,18 +218,41 @@ def _coerce_output(value: str | bytes | None) -> str:
return value
def _command_env(allowed_env: tuple[str, ...]) -> dict[str, str]:
def _command_env(allowed_env: tuple[str, ...], project_root: Path | None = None) -> dict[str, str]:
env = dict(os.environ) if not allowed_env else {
name: os.environ[name] for name in allowed_env if name in os.environ
}
python_dir = str(Path(sys.executable).resolve().parent)
venv_dir = _project_venv_dir(project_root) if project_root is not None else None
python_dir = str(_venv_scripts_dir(venv_dir) if venv_dir is not None else Path(sys.executable).resolve().parent)
current_path = env.get("PATH") or os.environ.get("PATH", "")
path_parts = [part for part in current_path.split(os.pathsep) if part]
env["PATH"] = os.pathsep.join([python_dir, *[part for part in path_parts if part != python_dir]])
env.setdefault("VIRTUAL_ENV", os.environ.get("VIRTUAL_ENV", ""))
if venv_dir is not None:
env["VIRTUAL_ENV"] = str(venv_dir)
else:
env.setdefault("VIRTUAL_ENV", os.environ.get("VIRTUAL_ENV", ""))
return env
def _project_venv_dir(project_root: Path | None) -> Path | None:
if project_root is None:
return None
candidates = (project_root / ".venv", project_root.parent / ".venv")
for candidate in candidates:
if _venv_python(candidate).exists():
return candidate.resolve()
return None
def _venv_scripts_dir(venv_dir: Path) -> Path:
return venv_dir / ("Scripts" if os.name == "nt" else "bin")
def _venv_python(venv_dir: Path) -> Path:
executable = "python.exe" if os.name == "nt" else "python"
return _venv_scripts_dir(venv_dir) / executable
def _kill_process_tree(process: subprocess.Popen[str]) -> None:
if os.name == "nt":
subprocess.run(

View File

@ -97,6 +97,8 @@ def format_setup_result(result: IntegrationSetupResult) -> str:
lines.append(f"- ({command.cwd}) {' '.join(command.args)}")
else:
lines.append("Setup complete.")
lines.append("Activate this venv in your current shell if you want plain `nightshift` and `python` to use it:")
lines.append(f" {_activation_command(result.venv_dir)}")
lines.append("Run from the project directory:")
lines.append(f" {result.python} -m nightshift.cli run --task TASK-001")
return "\n".join(lines)
@ -130,5 +132,11 @@ def _venv_python(venv_dir: Path) -> Path:
return venv_dir / "bin" / "python"
def _activation_command(venv_dir: Path) -> str:
if os.name == "nt":
return f"{venv_dir / 'Scripts' / 'Activate.ps1'}"
return f"source {venv_dir / 'bin' / 'activate'}"
def _default_nightshift_root() -> Path:
return Path(__file__).resolve().parents[1]

View File

@ -429,7 +429,7 @@ class PipelineRunner:
)
return result
if stage.type in COMMAND_STAGE_TYPES:
return self.command_executor.run_stage(stage, task.id)
return self.command_executor.run_stage(_stage_with_attempt_output(stage, retry_count), task.id)
if stage.type == "code_writer":
return self._run_code_writer_stage(stage, task, previous_outputs, retry_notes, retry_count)
if stage.type == "file_writer":
@ -1383,6 +1383,13 @@ def _attempt_filename(filename: str, retry_count: int) -> str:
return path.with_name(name).as_posix()
def _stage_with_attempt_output(stage: StageConfig, retry_count: int) -> StageConfig:
if retry_count <= 0:
return stage
output = _attempt_filename(stage.output or f"{stage.id}-output.txt", retry_count)
return replace(stage, output=output)
def _extract_exit_code(text: str) -> int | None:
match = re.search(r"Exit code:\s*(-?\d+)|code\s+(-?\d+)", text)
if not match:

View File

@ -14,12 +14,24 @@ Or create an isolated integration sandbox from the NightShift repository root:
python -m nightshift.cli integ-run --template tutorial-pastebin
```
To create the sandbox and set it up in one step:
```bash
python -m nightshift.cli integ-run --template tutorial-pastebin --setup
```
Then set up the generated Python project:
```bash
python -m nightshift.cli integ-setup --project integ_runs/<timestamp>/project
```
`integ-setup` cannot activate the venv for your current shell. In PowerShell, activate it manually if you want plain `python` and `nightshift` to use the integration venv:
```powershell
integ_runs\<timestamp>\.venv\Scripts\Activate.ps1
```
For a normal non-integration checkout, install target dependencies:
```bash
@ -31,6 +43,7 @@ Validate and run:
```bash
nightshift validate
nightshift run --task TASK-001
nightshift what-happened
```
When running from an integration sandbox, the same commands are run inside `integ_runs/<timestamp>/project`.

View File

@ -3,10 +3,10 @@
from __future__ import annotations
PACKAGE_VERSION = "0.2.3"
PACKAGE_VERSION = "0.2.4"
RELEASE_CHANNEL = "alpha"
hotdog_version = "new-york"
topping_version = "sport-peppers"
hotdog_version = "bratwurst"
topping_version = "relish"
HOTDOG_VERSIONS = (
"bratwurst",

238
nightshift/what_happened.py Normal file
View File

@ -0,0 +1,238 @@
"""Post-run explanation reports."""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import re
from .errors import NightShiftError
from .status import latest_run_dir
@dataclass(frozen=True)
class WhatHappenedReport:
run_dir: Path
task_dir: Path | None
content: str
def build_what_happened(
project_root: str | Path,
artifact_dir: str | Path,
*,
run_id: str = "latest",
task_id: str | None = None,
) -> WhatHappenedReport:
root = Path(project_root).resolve()
artifacts = (root / artifact_dir).resolve()
runs_dir = artifacts / "runs"
run_dir = _select_run_dir(runs_dir, run_id)
task_dir = _select_task_dir(run_dir, task_id)
content = format_what_happened(run_dir, task_dir)
return WhatHappenedReport(run_dir=run_dir, task_dir=task_dir, content=content)
def format_what_happened(run_dir: Path, task_dir: Path | None) -> str:
lines = ["# What Happened", "", f"Run: `{run_dir.name}`", ""]
run_summary = _read(run_dir / "run-summary.md")
if run_summary:
lines.extend(["## Outcome", "", *_summary_lines(run_summary), ""])
if task_dir is None:
lines.extend(["## Task", "", "- No task artifacts found.", ""])
return "\n".join(lines)
lines.extend(["## Task", "", f"- Directory: `{task_dir.relative_to(run_dir).as_posix()}`", ""])
final_notes = _read(task_dir / "final-notes.md")
if final_notes:
lines.extend(["## Final Notes", "", *_summary_lines(final_notes), ""])
stage_results = _read(task_dir / "stage-results.md")
if stage_results:
lines.extend(["## Stage Timeline", "", *_stage_lines(stage_results), ""])
command_outputs = _command_outputs(task_dir)
if command_outputs:
lines.extend(["## Command And Test Output", ""])
for path in command_outputs:
lines.extend(_artifact_excerpt(path, task_dir, max_lines=34))
lines.append("")
diagnostics = sorted((task_dir / "diagnostics").glob("*.md")) if (task_dir / "diagnostics").exists() else []
if diagnostics:
lines.extend(["## Diagnostics", ""])
for path in diagnostics[-5:]:
lines.extend(_artifact_excerpt(path, task_dir, max_lines=18))
lines.append("")
debugger = task_dir / "debugger.md"
if debugger.exists():
lines.extend(["## Debugger", "", *_artifact_excerpt(debugger, task_dir, max_lines=24), ""])
patches = _patch_attempts(task_dir)
if patches:
lines.extend(["## Code Attempts", ""])
for path in patches:
changed = _patch_changed_files(_read(path))
summary = ", ".join(changed[:6]) if changed else "no changed files detected"
if len(changed) > 6:
summary += f", +{len(changed) - 6} more"
lines.append(f"- `{path.name}`: {summary}")
lines.append("")
telemetry = task_dir / "telemetry-summary.md"
if telemetry.exists():
lines.extend(["## Model Attempts", "", *_artifact_excerpt(telemetry, task_dir, max_lines=28), ""])
likely = _likely_cause(command_outputs, diagnostics, debugger)
if likely:
lines.extend(["## Likely Cause", "", likely, ""])
return "\n".join(lines)
def _select_run_dir(runs_dir: Path, run_id: str) -> Path:
if run_id == "latest":
selected = latest_run_dir(runs_dir)
if selected is None:
raise NightShiftError(f"What happened error: no runs found under {runs_dir}")
return selected
selected = runs_dir / run_id
if not selected.exists() or not selected.is_dir():
raise NightShiftError(f"What happened error: run not found: {selected}")
return selected
def _select_task_dir(run_dir: Path, task_id: str | None) -> Path | None:
tasks_dir = run_dir / "tasks"
if not tasks_dir.exists():
return None
if task_id:
selected = tasks_dir / task_id
if not selected.exists() or not selected.is_dir():
raise NightShiftError(f"What happened error: task not found: {selected}")
return selected
candidates = [path for path in tasks_dir.iterdir() if path.is_dir()]
if not candidates:
return None
return max(candidates, key=lambda path: path.stat().st_mtime)
def _read(path: Path) -> str:
if not path.exists():
return ""
return path.read_text(encoding="utf-8", errors="replace")
def _summary_lines(text: str) -> list[str]:
selected: list[str] = []
wanted = ("- Task:", "- Status:", "- Retry count:", "- Reason:", "Task:", "Status:", "Retry count:", "Reason:")
for line in text.splitlines():
stripped = line.strip()
if stripped.startswith(wanted):
selected.append(stripped)
return selected[:12] or ["- No summary lines found."]
def _stage_lines(text: str) -> list[str]:
lines: list[str] = []
current = ""
status = ""
reason = ""
output = ""
for raw in [*text.splitlines(), "## END"]:
if raw.startswith("## "):
if current:
details = [status or "unknown"]
if reason:
details.append(reason)
if output:
details.append(f"artifact `{output}`")
lines.append(f"- `{current}`: " + "; ".join(details))
current = raw.removeprefix("## ").strip()
status = ""
reason = ""
output = ""
elif raw.startswith("Status:"):
status = raw.removeprefix("Status:").strip()
elif raw.startswith("Reason:"):
reason = raw.removeprefix("Reason:").strip()
elif raw.startswith("Output:"):
output = raw.removeprefix("Output:").strip()
return lines[:40] or ["- No stage results found."]
def _command_outputs(task_dir: Path) -> list[Path]:
paths = [
path
for path in task_dir.glob("*output*.txt")
if path.is_file() and not path.name.startswith("patch-apply-output")
]
return sorted(paths, key=lambda path: path.stat().st_mtime)[-6:]
def _artifact_excerpt(path: Path, base: Path, *, max_lines: int) -> list[str]:
text = _read(path)
excerpt = _tail_relevant_lines(text, max_lines=max_lines)
rel = path.relative_to(base).as_posix()
return [f"### `{rel}`", "", "```text", *excerpt, "```", ""]
def _tail_relevant_lines(text: str, *, max_lines: int) -> list[str]:
lines = text.splitlines()
if len(lines) <= max_lines:
return lines
important = [
line
for line in lines
if any(
marker in line
for marker in (
"ERROR",
"FAILED",
"Traceback",
"Exception",
"Exit code:",
"Command:",
"ModuleNotFoundError",
"ImportError",
"NameError",
"AssertionError",
"Failure category:",
"Probable root cause:",
"Recommended next action:",
)
)
]
if important:
return important[-max_lines:]
return lines[-max_lines:]
def _patch_attempts(task_dir: Path) -> list[Path]:
names = ["proposed.patch", *[f"repair-{index}.patch" for index in range(1, 20)]]
return [task_dir / name for name in names if (task_dir / name).exists()]
def _patch_changed_files(text: str) -> list[str]:
files: list[str] = []
for match in re.finditer(r"^diff --git a/(.*?) b/", text, flags=re.MULTILINE):
path = match.group(1)
if path not in files:
files.append(path)
return files
def _likely_cause(command_outputs: list[Path], diagnostics: list[Path], debugger: Path) -> str:
combined = "\n".join([_read(path) for path in [*command_outputs, *diagnostics, debugger]])
if "ModuleNotFoundError: No module named" in combined:
return (
"The latest command could not import a Python package. For src-layout projects, "
"check that the command stage is using the project venv or that the project is installed editable."
)
if "NameError:" in combined:
return "The latest implementation patch introduced a missing symbol or import."
if "AssertionError" in combined or "FAILED" in combined:
return "The tests ran but assertions failed; inspect the test output and latest repair patch."
return ""

View File

@ -21,3 +21,6 @@ include = ["nightshift*"]
[tool.setuptools.package-data]
nightshift = ["project_templates/**/*"]
[tool.pytest.ini_options]
testpaths = ["tests"]

View File

@ -176,6 +176,20 @@ class CommandExecutorTests(unittest.TestCase):
first_path = env["PATH"].split(";")[0] if ";" in env["PATH"] else env["PATH"].split(":")[0]
self.assertEqual(Path(first_path), Path(sys.executable).resolve().parent)
def test_command_env_prefers_project_venv_when_present(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
scripts = root / ".venv" / ("Scripts" if sys.platform == "win32" else "bin")
scripts.mkdir(parents=True)
executable = scripts / ("python.exe" if sys.platform == "win32" else "python")
executable.write_text("", encoding="utf-8")
env = _command_env((), project_root=root)
first_path = env["PATH"].split(";")[0] if ";" in env["PATH"] else env["PATH"].split(":")[0]
self.assertEqual(Path(first_path), scripts.resolve())
self.assertEqual(Path(env["VIRTUAL_ENV"]), (root / ".venv").resolve())
if __name__ == "__main__":
unittest.main()

View File

@ -1,9 +1,10 @@
from pathlib import Path
import os
import tempfile
import unittest
from nightshift.integ import create_integration_run
from nightshift.integ_setup import format_setup_result, setup_python_project
from nightshift.integ_setup import IntegrationSetupResult, format_setup_result, setup_python_project
class IntegrationSetupTests(unittest.TestCase):
@ -44,6 +45,29 @@ class IntegrationSetupTests(unittest.TestCase):
self.assertEqual(result.venv_dir, project.parent / ".venv")
self.assertTrue(result.created_venv)
def test_format_setup_result_includes_activation_hint(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
run = create_integration_run(root, template="tutorial-pastebin")
result = setup_python_project(
run.directory / "project",
nightshift_root=Path(__file__).resolve().parents[1],
extras=(),
dry_run=True,
)
rendered = format_setup_result(IntegrationSetupResult(
project_dir=result.project_dir,
venv_dir=result.venv_dir,
python=result.python,
created_venv=result.created_venv,
commands=result.commands,
dry_run=False,
))
self.assertIn("Activate", rendered)
self.assertIn("Activate.ps1" if os.name == "nt" else "bin", rendered)
if __name__ == "__main__":
unittest.main()

View File

@ -15,8 +15,8 @@ from nightshift.version import (
class VersionTests(unittest.TestCase):
def test_display_version_includes_channel_hotdog_and_topping(self) -> None:
self.assertEqual(display_version(), "0.2.3-alpha-new-york-sport-peppers")
self.assertEqual(PACKAGE_VERSION, "0.2.3")
self.assertEqual(display_version(), "0.2.4-alpha-bratwurst-relish")
self.assertEqual(PACKAGE_VERSION, "0.2.4")
self.assertIn(hotdog_version, HOTDOG_VERSIONS)
self.assertIn(topping_version, TOPPING_VERSIONS)

View File

@ -0,0 +1,56 @@
from pathlib import Path
import tempfile
import unittest
from nightshift.what_happened import build_what_happened
class WhatHappenedTests(unittest.TestCase):
def test_build_what_happened_summarizes_latest_failed_task(self) -> None:
with tempfile.TemporaryDirectory() as directory:
root = Path(directory)
task_dir = root / ".nightshift" / "runs" / "20260520T000000.000000Z" / "tasks" / "TASK-001"
diagnostics = task_dir / "diagnostics"
diagnostics.mkdir(parents=True)
run_dir = task_dir.parents[1]
(run_dir / "run-summary.md").write_text(
"# Run Summary\n\n- Task: TASK-001\n- Status: failed\n- Retry count: 1\n- Reason: test failed\n",
encoding="utf-8",
)
(task_dir / "stage-results.md").write_text(
"\n".join(
[
"# Stage Results",
"",
"## test",
"",
"Status: fail",
"Reason: Command exited with code 2: python -m pytest -q",
"Output: test-output-1.txt",
]
),
encoding="utf-8",
)
(task_dir / "test-output-1.txt").write_text(
"Command: `python -m pytest -q`\nExit code: 2\nModuleNotFoundError: No module named 'pastebin_app'\n",
encoding="utf-8",
)
(diagnostics / "test-failure-retry-1.md").write_text(
"Failure category: missing dependency\nProbable root cause: Runtime cannot import required package.\n",
encoding="utf-8",
)
(task_dir / "repair-1.patch").write_text(
"diff --git a/src/app.py b/src/app.py\n--- a/src/app.py\n+++ b/src/app.py\n",
encoding="utf-8",
)
report = build_what_happened(root, ".nightshift")
self.assertIn("Status: failed", report.content)
self.assertIn("ModuleNotFoundError", report.content)
self.assertIn("missing dependency", report.content)
self.assertIn("repair-1.patch", report.content)
if __name__ == "__main__":
unittest.main()