mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 10:08:37 +00:00
Add an easier first tutorial, add installers
This commit is contained in:
parent
76b7942c4a
commit
3616c1155a
19
README.md
19
README.md
|
|
@ -55,6 +55,20 @@ NightShift does not push branches, deploy software, run unbounded task swarms, o
|
|||
|
||||
## Install
|
||||
|
||||
Repo setup scripts can install NightShift in editable mode, check for Ollama, and offer to add the Python scripts directory to PATH.
|
||||
|
||||
Windows PowerShell:
|
||||
|
||||
```powershell
|
||||
.\setup.ps1
|
||||
```
|
||||
|
||||
macOS/Linux:
|
||||
|
||||
```bash
|
||||
sh ./setup.sh
|
||||
```
|
||||
|
||||
Development install:
|
||||
|
||||
```bash
|
||||
|
|
@ -73,7 +87,7 @@ NightShift uses the Python standard library for runtime behavior where practical
|
|||
|
||||
Start with the [Quickstart](QUICKSTART.md). It uses deterministic fake agents so you can verify lookup, context generation, patch validation, patch apply, tests, and artifacts without installing a model.
|
||||
|
||||
After that works, continue with [Tutorial 01: Running NightShift With Real Local Models](examples/tutorial/01-intro.md). It swaps the fake agents for Ollama-backed agents such as `qwen2.5-coder:14b` and walks through dry-run and apply-mode patch generation.
|
||||
After that works, continue with [Tutorial 01: Building A Small Imageboard With Real Local Models](examples/tutorial/01-imageboard/README.md). It swaps the fake agents for Ollama-backed agents such as `qwen2.5-coder:14b` and walks through a small Flask/SQLite project with ordinary web-app tasks.
|
||||
|
||||
### Quickstart Commands
|
||||
|
||||
|
|
@ -315,7 +329,8 @@ python -m compileall nightshift tests
|
|||
Additional docs:
|
||||
|
||||
- [Quickstart](QUICKSTART.md)
|
||||
- [Tutorial: running real local models](examples/tutorial/01-intro.md)
|
||||
- [Tutorial 01: imageboard with real local models](examples/tutorial/01-imageboard/README.md)
|
||||
- [Tutorial 02: Lisp with real local models](examples/tutorial/02-lisp/README.md)
|
||||
- [Config reference](docs/config-reference.md)
|
||||
- [Artifact review workflow](docs/artifact-review.md)
|
||||
- [Troubleshooting](docs/troubleshooting.md)
|
||||
|
|
|
|||
411
examples/tutorial/01-imageboard/README.md
Normal file
411
examples/tutorial/01-imageboard/README.md
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
# Tutorial 01: Building A Small Imageboard With Real Local Models
|
||||
|
||||
This tutorial starts after the quickstart. The quickstart uses fake command agents so you can verify the pipeline deterministically. Here, you will point NightShift at a small web application and let a local model implement one feature slice at a time.
|
||||
|
||||
The target is a compact 4chan-style imageboard: boards, threads, replies, images, tripcodes, sessions, reports, and moderation. That is larger than a toy parser, but it is a better first real-model target because each task maps to ordinary web-app files and tests.
|
||||
|
||||
Keep the first run scoped to `TASK-001`. Let later tasks build on the previous completed task.
|
||||
|
||||
## What You Will Build
|
||||
|
||||
You will create a disposable Flask project with SQLite and use NightShift to implement:
|
||||
|
||||
1. Board and thread data model, routes, SQLite schema, and tests.
|
||||
2. Image upload and thumbnail generation.
|
||||
3. Bump ordering and reply counters.
|
||||
4. Tripcodes and session cookies.
|
||||
5. Moderation and report queue.
|
||||
|
||||
NightShift still controls the workflow. The model proposes code; NightShift validates, applies, tests, records artifacts, and shows the result in the dashboard.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Install NightShift from this repository:
|
||||
|
||||
```bash
|
||||
python -m pip install -e .
|
||||
```
|
||||
|
||||
Install runtime dependencies for the target project:
|
||||
|
||||
```bash
|
||||
python -m pip install flask pillow pytest
|
||||
```
|
||||
|
||||
Install and start Ollama, then make sure the model is available:
|
||||
|
||||
```bash
|
||||
ollama pull qwen2.5-coder:14b
|
||||
ollama list
|
||||
```
|
||||
|
||||
NightShift uses Ollama's local HTTP API, normally at `http://localhost:11434`.
|
||||
|
||||
## 1. Create A Scratch Target Project
|
||||
|
||||
Do not run apply-mode experiments directly inside the NightShift repo. Create a disposable project.
|
||||
|
||||
PowerShell:
|
||||
|
||||
```powershell
|
||||
$TargetProject = "$HOME\Documents\nightshift-imageboard"
|
||||
New-Item -ItemType Directory -Force $TargetProject
|
||||
Set-Location $TargetProject
|
||||
New-Item -ItemType Directory -Force agents, tests, static\uploads, static\thumbs, templates
|
||||
```
|
||||
|
||||
Bash:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/nightshift-imageboard/{agents,tests,static/uploads,static/thumbs,templates}
|
||||
cd ~/nightshift-imageboard
|
||||
```
|
||||
|
||||
## 2. Add The Starter App
|
||||
|
||||
Create `app.py`:
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
|
||||
from flask import Flask, abort, g, redirect, render_template_string, request, url_for
|
||||
|
||||
|
||||
DATABASE = "imageboard.db"
|
||||
|
||||
|
||||
def create_app(database: str | None = None) -> Flask:
|
||||
app = Flask(__name__)
|
||||
app.config["DATABASE"] = database or DATABASE
|
||||
app.config["UPLOAD_DIR"] = Path("static/uploads")
|
||||
app.config["THUMB_DIR"] = Path("static/thumbs")
|
||||
app.secret_key = "dev-secret"
|
||||
|
||||
@app.before_request
|
||||
def open_db() -> None:
|
||||
g.db = sqlite3.connect(app.config["DATABASE"])
|
||||
g.db.row_factory = sqlite3.Row
|
||||
|
||||
@app.teardown_request
|
||||
def close_db(_exc: BaseException | None) -> None:
|
||||
db = g.pop("db", None)
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
@app.get("/")
|
||||
def index():
|
||||
return redirect(url_for("board", name="test"))
|
||||
|
||||
@app.get("/board/<name>")
|
||||
def board(name: str):
|
||||
abort(501)
|
||||
|
||||
@app.get("/thread/<int:thread_id>")
|
||||
def thread(thread_id: int):
|
||||
abort(501)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_app().run(debug=True)
|
||||
```
|
||||
|
||||
Create `schema.sql`:
|
||||
|
||||
```sql
|
||||
-- NightShift will fill this in during TASK-001.
|
||||
```
|
||||
|
||||
Create `models.py`:
|
||||
|
||||
```python
|
||||
"""Database helpers for the imageboard tutorial."""
|
||||
```
|
||||
|
||||
Create `tests/test_app.py`:
|
||||
|
||||
```python
|
||||
from app import create_app
|
||||
|
||||
|
||||
def test_index_redirects_to_test_board(tmp_path):
|
||||
app = create_app(str(tmp_path / "test.db"))
|
||||
client = app.test_client()
|
||||
|
||||
response = client.get("/")
|
||||
|
||||
assert response.status_code == 302
|
||||
assert response.headers["Location"].endswith("/board/test")
|
||||
```
|
||||
|
||||
## 3. Add NightShift Config
|
||||
|
||||
Create `nightshift.yaml`:
|
||||
|
||||
```yaml
|
||||
project:
|
||||
name: imageboard
|
||||
root: .
|
||||
task_file: tasks.md
|
||||
artifact_dir: .nightshift
|
||||
|
||||
safety:
|
||||
require_clean_worktree: false
|
||||
scoped_paths:
|
||||
- .
|
||||
allowed_commands:
|
||||
- python -m pytest -q
|
||||
forbidden_commands:
|
||||
- rm -rf
|
||||
- git push
|
||||
- curl | bash
|
||||
|
||||
experiment:
|
||||
label: imageboard-real-model
|
||||
prompt_variant: ollama-qwen25-coder-14b-v1
|
||||
|
||||
agents:
|
||||
planner:
|
||||
backend: ollama
|
||||
model: qwen2.5-coder:14b
|
||||
temperature: 0.2
|
||||
system_prompt: agents/planner.md
|
||||
|
||||
implementer:
|
||||
backend: ollama
|
||||
model: qwen2.5-coder:14b
|
||||
temperature: 0.1
|
||||
system_prompt: agents/implementer.md
|
||||
|
||||
reviewer:
|
||||
backend: ollama
|
||||
model: qwen2.5-coder:14b
|
||||
temperature: 0.1
|
||||
system_prompt: agents/reviewer.md
|
||||
|
||||
pipeline:
|
||||
max_task_retries: 3
|
||||
continue_on_task_failure: false
|
||||
stages:
|
||||
- id: plan
|
||||
type: agent
|
||||
agent: planner
|
||||
output: plan.md
|
||||
|
||||
- id: context
|
||||
type: repo_context
|
||||
output: context-pack.md
|
||||
|
||||
- id: implement
|
||||
type: file_writer
|
||||
agent: implementer
|
||||
output: proposed.patch
|
||||
|
||||
- id: normalize
|
||||
type: patch_normalizer
|
||||
output: normalized.patch
|
||||
|
||||
- id: validate_patch
|
||||
type: patch_validator
|
||||
output: patch-validation.md
|
||||
max_files: 8
|
||||
max_lines: 700
|
||||
on_fail: implement
|
||||
|
||||
- id: apply_patch
|
||||
type: patch_apply
|
||||
mode: apply
|
||||
output: patch-apply-output.txt
|
||||
on_fail: implement
|
||||
|
||||
- id: test
|
||||
type: command
|
||||
commands:
|
||||
- python -m pytest -q
|
||||
output: test-output.txt
|
||||
shell: true
|
||||
timeout_seconds: 20
|
||||
on_fail: implement
|
||||
|
||||
- id: review
|
||||
type: agent_review
|
||||
agent: reviewer
|
||||
on_fail: implement
|
||||
output: review.md
|
||||
|
||||
- id: summarize
|
||||
type: summarize
|
||||
output: final-notes.md
|
||||
```
|
||||
|
||||
## 4. Add Agent Prompts
|
||||
|
||||
Create `agents/planner.md`:
|
||||
|
||||
```markdown
|
||||
You are the planning agent for NightShift.
|
||||
|
||||
Create a concise implementation plan for the current task.
|
||||
|
||||
If you need repository context before planning, output lookup requests exactly like this:
|
||||
|
||||
lookup_requests:
|
||||
- tool: read_file
|
||||
path: relative/path.py
|
||||
- tool: grep
|
||||
path: .
|
||||
pattern: search_regex
|
||||
|
||||
After context is provided, write a short plan with:
|
||||
- files to edit
|
||||
- tests to add or update
|
||||
- risks
|
||||
|
||||
Do not write code.
|
||||
```
|
||||
|
||||
Create `agents/implementer.md`:
|
||||
|
||||
````markdown
|
||||
You are the implementation agent for NightShift.
|
||||
|
||||
Output only complete file content blocks.
|
||||
Use one fenced block per file with this exact opening form:
|
||||
```file:relative/path.py
|
||||
<complete file content>
|
||||
```
|
||||
Do not include explanations before or after the file blocks.
|
||||
Include tests when needed.
|
||||
Keep the change as small as possible.
|
||||
Only edit files needed for the task.
|
||||
````
|
||||
|
||||
Create `agents/reviewer.md`:
|
||||
|
||||
```markdown
|
||||
You are the review agent for NightShift.
|
||||
|
||||
Review the task, plan, patch artifacts, test output, and final state.
|
||||
|
||||
Output exactly:
|
||||
|
||||
status: pass | fail | retry | escalate
|
||||
reason: <short explanation>
|
||||
next_stage: <optional stage id>
|
||||
context_update: <compact useful note>
|
||||
|
||||
Use retry when the implementation is close but needs another patch.
|
||||
Use fail when the patch is unsafe, unrelated, or clearly broken.
|
||||
Use pass only when the acceptance criteria are satisfied.
|
||||
```
|
||||
|
||||
## 5. Add The Task List
|
||||
|
||||
Create `tasks.md`:
|
||||
|
||||
```markdown
|
||||
# Tasks
|
||||
|
||||
- [ ] TASK-001: Board and thread foundation
|
||||
|
||||
Description:
|
||||
Implement the initial imageboard data model and read routes. Add a SQLite schema and model helpers for boards, threads, and replies. Implement `/board/<name>` and `/thread/<id>` routes with simple HTML responses. Include tests that initialize a temporary database, create board/thread/reply records, and verify both routes.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Defines SQLite tables for boards, threads, and replies
|
||||
- Provides database initialization and model helper functions
|
||||
- Implements `/board/<name>` route showing threads for that board
|
||||
- Implements `/thread/<id>` route showing the thread and replies
|
||||
- Includes route and model tests using a temporary database
|
||||
|
||||
- [ ] TASK-002: Image upload and thumbnails
|
||||
|
||||
Dependencies:
|
||||
- TASK-001
|
||||
|
||||
Description:
|
||||
Add image attachment support for new threads and replies. Store uploaded image metadata in SQLite, save uploaded files under `static/uploads`, and generate thumbnails under `static/thumbs`.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Accepts image uploads for threads and replies
|
||||
- Stores image filename, thumbnail filename, MIME type, and size
|
||||
- Generates thumbnails with Pillow
|
||||
- Rejects unsupported or oversized files
|
||||
- Includes upload and thumbnail tests
|
||||
|
||||
- [ ] TASK-003: Bump ordering and reply counts
|
||||
|
||||
Dependencies:
|
||||
- TASK-002
|
||||
|
||||
Description:
|
||||
Sort board threads by most recent bump. Creating a reply updates the thread bump timestamp and increments reply counters.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Board pages sort threads by latest bump time
|
||||
- Replies increment thread reply count
|
||||
- Reply creation updates bump timestamp
|
||||
- Tests cover ordering and counters
|
||||
|
||||
- [ ] TASK-004: Tripcodes and session cookies
|
||||
|
||||
Dependencies:
|
||||
- TASK-003
|
||||
|
||||
Description:
|
||||
Add anonymous names, optional tripcodes, and a session cookie for lightweight poster identity.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Supports optional name and tripcode input
|
||||
- Stores tripcode hashes without storing raw tripcode secrets
|
||||
- Sets and reuses a poster session cookie
|
||||
- Displays stable poster identity on posts
|
||||
- Includes tripcode and session tests
|
||||
|
||||
- [ ] TASK-005: Moderation and report queue
|
||||
|
||||
Dependencies:
|
||||
- TASK-004
|
||||
|
||||
Description:
|
||||
Add post reporting and a simple moderation queue. Moderators can view reports, dismiss reports, and hide reported posts.
|
||||
|
||||
Acceptance Criteria:
|
||||
- Users can report threads and replies
|
||||
- Reports are stored with reason and timestamp
|
||||
- Moderation queue lists open reports
|
||||
- Moderation actions can dismiss reports or hide posts
|
||||
- Includes moderation and report queue tests
|
||||
```
|
||||
|
||||
## 6. Validate And Run
|
||||
|
||||
Validate the project:
|
||||
|
||||
```bash
|
||||
python -m nightshift.cli validate --config nightshift.yaml
|
||||
```
|
||||
|
||||
Run only the first task:
|
||||
|
||||
```bash
|
||||
python -m nightshift.cli run --config nightshift.yaml --task TASK-001
|
||||
```
|
||||
|
||||
Start the dashboard:
|
||||
|
||||
```bash
|
||||
python -m nightshift.cli web --config nightshift.yaml --host 127.0.0.1 --port 8765
|
||||
```
|
||||
|
||||
Open `http://127.0.0.1:8765/`.
|
||||
|
||||
## Notes On Scope
|
||||
|
||||
This is still a non-trivial first project. The advantage over a tiny interpreter is that failures are ordinary web-app failures: missing routes, schema mistakes, file handling, or tests. Those are easier to inspect in NightShift artifacts than parser recursion or tokenizer loops.
|
||||
|
||||
Keep the tasks sequential. Do not ask the model to implement uploads, tripcodes, or moderation before `TASK-001` is passing.
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# Tutorial 01: Running NightShift With Real Local Models
|
||||
# Tutorial 02: Running NightShift With Real Local Models On Tiny Lisp
|
||||
|
||||
This tutorial starts after the quickstart. The quickstart uses fake command agents so you can verify the pipeline deterministically. Here, you will replace those fake agents with real Ollama-backed agents and let a model generate a real patch.
|
||||
|
||||
|
|
@ -144,33 +144,39 @@ class CommandExecutor:
|
|||
env.setdefault("PATH", os.environ["PATH"])
|
||||
|
||||
started = time.monotonic()
|
||||
process = subprocess.Popen(
|
||||
args,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
env=env,
|
||||
)
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
args,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
stdout, stderr = process.communicate(timeout=timeout)
|
||||
duration = time.monotonic() - started
|
||||
return CommandRun(
|
||||
command=normalized,
|
||||
exit_code=completed.returncode,
|
||||
stdout=_coerce_output(completed.stdout),
|
||||
stderr=_coerce_output(completed.stderr),
|
||||
exit_code=process.returncode if process.returncode is not None else -1,
|
||||
stdout=_coerce_output(stdout),
|
||||
stderr=_coerce_output(stderr),
|
||||
duration_seconds=duration,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
except subprocess.TimeoutExpired:
|
||||
_kill_process_tree(process)
|
||||
try:
|
||||
stdout, stderr = process.communicate(timeout=2)
|
||||
except subprocess.TimeoutExpired:
|
||||
stdout, stderr = "", "Timed out while collecting process output after termination."
|
||||
duration = time.monotonic() - started
|
||||
return CommandRun(
|
||||
command=normalized,
|
||||
exit_code=-1,
|
||||
stdout=_coerce_output(exc.stdout),
|
||||
stderr=_coerce_output(exc.stderr),
|
||||
stdout=_coerce_output(stdout),
|
||||
stderr=_coerce_output(stderr),
|
||||
duration_seconds=duration,
|
||||
timed_out=True,
|
||||
)
|
||||
|
|
@ -213,3 +219,16 @@ def _coerce_output(value: str | bytes | None) -> str:
|
|||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
return value
|
||||
|
||||
|
||||
def _kill_process_tree(process: subprocess.Popen[str]) -> None:
|
||||
if os.name == "nt":
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/T", "/PID", str(process.pid)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
)
|
||||
return
|
||||
process.kill()
|
||||
|
|
|
|||
|
|
@ -596,49 +596,83 @@ class PipelineRunner:
|
|||
)
|
||||
raw_output = self._read_output(result.output_path)
|
||||
stdout = extract_agent_stdout(raw_output)
|
||||
try:
|
||||
updates = parse_file_updates(stdout)
|
||||
patch = generate_patch_from_file_updates(
|
||||
updates,
|
||||
self.config.project.root,
|
||||
self.config.safety,
|
||||
forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS,
|
||||
)
|
||||
patch_reason = "Deterministic patch written from file blocks."
|
||||
log_message = "Wrote deterministic patch from file blocks"
|
||||
except PipelineError as exc:
|
||||
invalid_rerun_done = False
|
||||
while True:
|
||||
try:
|
||||
patch = normalize_patch_text(stdout)
|
||||
except PipelineError:
|
||||
summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md"
|
||||
reason = str(exc)
|
||||
if "generated patch has no changes" in reason:
|
||||
next_stage = self._stage_after_patch_flow(stage.id)
|
||||
reason = self._no_changes_reason(retry_count)
|
||||
summary_path = self.artifacts.write_stage_output(
|
||||
updates = parse_file_updates(stdout)
|
||||
patch = generate_patch_from_file_updates(
|
||||
updates,
|
||||
self.config.project.root,
|
||||
self.config.safety,
|
||||
forbidden_paths=stage.forbidden_paths or DEFAULT_FORBIDDEN_PATHS,
|
||||
)
|
||||
patch_reason = "Deterministic patch written from file blocks."
|
||||
log_message = "Wrote deterministic patch from file blocks"
|
||||
break
|
||||
except PipelineError as exc:
|
||||
if (
|
||||
"no file blocks found" in str(exc)
|
||||
and "diff --git " not in stdout
|
||||
and not invalid_rerun_done
|
||||
):
|
||||
invalid_rerun_done = True
|
||||
self.logger.event(
|
||||
"agent.rerun",
|
||||
"Re-running file writer after invalid output",
|
||||
stage_id=stage.id,
|
||||
task_id=task.id,
|
||||
)
|
||||
rerun_outputs = dict(enriched_outputs)
|
||||
rerun_outputs["invalid_file_writer_output"] = stdout
|
||||
strict_notes = [
|
||||
*retry_notes,
|
||||
"Previous file_writer output was invalid. Return complete file blocks now. Do not output lookup_requests, prose, or 'lookup failed'.",
|
||||
]
|
||||
result = self.agent_executor.run_stage(
|
||||
agent_stage,
|
||||
task,
|
||||
rerun_outputs,
|
||||
strict_notes,
|
||||
project_context=context.project_context,
|
||||
task_context=context.task_context,
|
||||
retry_context="\n".join(f"- {note}" for note in strict_notes),
|
||||
)
|
||||
raw_output = self._read_output(result.output_path)
|
||||
stdout = extract_agent_stdout(raw_output)
|
||||
continue
|
||||
try:
|
||||
patch = normalize_patch_text(stdout)
|
||||
except PipelineError:
|
||||
summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md"
|
||||
reason = str(exc)
|
||||
if "generated patch has no changes" in reason:
|
||||
next_stage = self._stage_after_patch_flow(stage.id)
|
||||
reason = self._no_changes_reason(retry_count)
|
||||
summary_path = self.artifacts.write_stage_output(
|
||||
task.id,
|
||||
summary_filename,
|
||||
f"# Implementation Summary\n\nStatus: pass\nReason: {reason}\n",
|
||||
)
|
||||
return StageResult(
|
||||
stage.id,
|
||||
"pass",
|
||||
reason,
|
||||
output_path=result.output_path,
|
||||
next_stage=next_stage,
|
||||
context_update=(
|
||||
f"Implementation summary: "
|
||||
f"{summary_path.relative_to(self.config.project.root).as_posix()}"
|
||||
),
|
||||
)
|
||||
self.artifacts.write_stage_output(
|
||||
task.id,
|
||||
summary_filename,
|
||||
f"# Implementation Summary\n\nStatus: pass\nReason: {reason}\n",
|
||||
f"# Implementation Summary\n\nStatus: fail\nReason: {reason}\n",
|
||||
)
|
||||
return StageResult(
|
||||
stage.id,
|
||||
"pass",
|
||||
reason,
|
||||
output_path=result.output_path,
|
||||
next_stage=next_stage,
|
||||
context_update=(
|
||||
f"Implementation summary: "
|
||||
f"{summary_path.relative_to(self.config.project.root).as_posix()}"
|
||||
),
|
||||
)
|
||||
self.artifacts.write_stage_output(
|
||||
task.id,
|
||||
summary_filename,
|
||||
f"# Implementation Summary\n\nStatus: fail\nReason: {reason}\n",
|
||||
)
|
||||
return StageResult(stage.id, "fail", reason, output_path=result.output_path)
|
||||
patch_reason = "Fallback patch written from unified diff output."
|
||||
log_message = "Wrote fallback patch from unified diff output"
|
||||
return StageResult(stage.id, "fail", reason, output_path=result.output_path)
|
||||
patch_reason = "Fallback patch written from unified diff output."
|
||||
log_message = "Wrote fallback patch from unified diff output"
|
||||
break
|
||||
patch_filename = "repair-{0}.patch".format(retry_count) if retry_count else (stage.output or "proposed.patch")
|
||||
summary_filename = "implementation-summary.md" if retry_count == 0 else f"repair-summary-{retry_count}.md"
|
||||
proposed_path = self.artifacts.write_stage_output(task.id, patch_filename, patch)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from .artifacts import ArtifactStore
|
||||
|
|
@ -87,6 +88,9 @@ class ReportGenerator:
|
|||
retry_count=retry_count,
|
||||
stage_results=stage_results,
|
||||
modified_files=modified_files,
|
||||
run_log=self.artifacts.run_log_path.read_text(encoding="utf-8", errors="replace")
|
||||
if self.artifacts.run_log_path.exists()
|
||||
else "",
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
|
@ -225,6 +229,7 @@ def format_devlog(
|
|||
retry_count: int,
|
||||
stage_results: list[StageResult],
|
||||
modified_files: list[str],
|
||||
run_log: str = "",
|
||||
) -> str:
|
||||
lines = [
|
||||
"# Devlog",
|
||||
|
|
@ -236,6 +241,9 @@ def format_devlog(
|
|||
f"Outcome: {reason}",
|
||||
"",
|
||||
]
|
||||
timeline = _format_devlog_timeline(run_log)
|
||||
if timeline:
|
||||
lines.extend(["## Timeline", "", *timeline, ""])
|
||||
stage_titles = {
|
||||
"agent": "Agent",
|
||||
"agent_review": "Reviewer",
|
||||
|
|
@ -276,6 +284,63 @@ def format_devlog(
|
|||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_devlog_timeline(run_log: str) -> list[str]:
|
||||
current_stage = ""
|
||||
lines: list[str] = []
|
||||
for raw_line in run_log.splitlines():
|
||||
event, fields = _parse_run_log_line(raw_line)
|
||||
if not event:
|
||||
continue
|
||||
stage_id = fields.get("stage_id") or current_stage
|
||||
if event == "stage.start":
|
||||
current_stage = fields.get("stage_id", current_stage)
|
||||
lines.append(f"- {stage_id}: started {fields.get('stage_type', 'stage')}.")
|
||||
elif event == "agent.rerun":
|
||||
lines.append(f"- {stage_id}: reran the agent with extra context.")
|
||||
elif event == "tool.call":
|
||||
actor = _devlog_stage_label(stage_id or current_stage or "repo lookup", {})
|
||||
tool = fields.get("tool", "tool")
|
||||
path = fields.get("path", ".")
|
||||
pattern = fields.get("pattern")
|
||||
if tool == "grep":
|
||||
lines.append(f"- {actor}: searched `{path}` for `{pattern or ''}`.")
|
||||
elif tool == "read_file":
|
||||
lines.append(f"- {actor}: read `{path}`.")
|
||||
elif tool == "list_files":
|
||||
lines.append(f"- {actor}: listed files under `{path}`.")
|
||||
else:
|
||||
lines.append(f"- {actor}: ran repo lookup `{tool}` on `{path}`.")
|
||||
elif event == "artifact.write":
|
||||
artifact = fields.get("artifact_path")
|
||||
if artifact:
|
||||
actor = _devlog_stage_label(stage_id or current_stage or "artifact", {})
|
||||
lines.append(f"- {actor}: wrote `{artifact}`.")
|
||||
elif event == "command.start":
|
||||
lines.append(f"- {stage_id}: ran `{fields.get('command', 'command')}`.")
|
||||
elif event == "command.finish":
|
||||
lines.append(f"- {stage_id}: command exited with code {fields.get('exit_code', '?')}.")
|
||||
elif event == "stage.next":
|
||||
lines.append(f"- {stage_id}: skipped ahead to `{fields.get('next_stage', '')}`.")
|
||||
elif event == "stage.retry":
|
||||
lines.append(f"- {stage_id}: requested retry to `{fields.get('next_stage', '')}`.")
|
||||
elif event == "stage.finish":
|
||||
lines.append(f"- {stage_id}: finished with {fields.get('status', 'unknown')} - {fields.get('reason', '')}")
|
||||
return lines
|
||||
|
||||
|
||||
def _parse_run_log_line(line: str) -> tuple[str, dict[str, str]]:
|
||||
parts = [part.strip() for part in line.split(" | ")]
|
||||
if len(parts) < 3:
|
||||
return "", {}
|
||||
event = parts[1]
|
||||
fields: dict[str, str] = {}
|
||||
for part in parts[3:]:
|
||||
match = re.match(r"([^=]+)=(.*)", part)
|
||||
if match:
|
||||
fields[match.group(1).strip()] = match.group(2).strip()
|
||||
return event, fields
|
||||
|
||||
|
||||
def _devlog_stage_label(stage_id: str, stage_titles: dict[str, str]) -> str:
|
||||
normalized = stage_id.lower()
|
||||
if "plan" in normalized:
|
||||
|
|
|
|||
99
setup.ps1
Normal file
99
setup.ps1
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
param(
|
||||
[switch]$Yes
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Test-Command {
|
||||
param([string]$Name)
|
||||
$null -ne (Get-Command $Name -ErrorAction SilentlyContinue)
|
||||
}
|
||||
|
||||
function Ask-YesNo {
|
||||
param(
|
||||
[string]$Question,
|
||||
[bool]$Default = $true
|
||||
)
|
||||
if ($Yes) {
|
||||
return $true
|
||||
}
|
||||
$suffix = if ($Default) { "[Y/n]" } else { "[y/N]" }
|
||||
$answer = Read-Host "$Question $suffix"
|
||||
if ([string]::IsNullOrWhiteSpace($answer)) {
|
||||
return $Default
|
||||
}
|
||||
return $answer.Trim().ToLowerInvariant().StartsWith("y")
|
||||
}
|
||||
|
||||
function Add-UserPath {
|
||||
param([string]$Directory)
|
||||
$current = [Environment]::GetEnvironmentVariable("Path", "User")
|
||||
$parts = @()
|
||||
if (-not [string]::IsNullOrWhiteSpace($current)) {
|
||||
$parts = $current -split ";" | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }
|
||||
}
|
||||
if ($parts -contains $Directory) {
|
||||
return
|
||||
}
|
||||
$newPath = if ($parts.Count -gt 0) { ($parts + $Directory) -join ";" } else { $Directory }
|
||||
[Environment]::SetEnvironmentVariable("Path", $newPath, "User")
|
||||
$env:Path = ($env:Path + ";" + $Directory)
|
||||
}
|
||||
|
||||
$repoRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
|
||||
Set-Location $repoRoot
|
||||
|
||||
Write-Host "NightShift setup"
|
||||
Write-Host "Repo: $repoRoot"
|
||||
|
||||
if (-not (Test-Command "python")) {
|
||||
throw "Python was not found on PATH. Install Python 3.11+ and rerun setup.ps1."
|
||||
}
|
||||
|
||||
$pythonVersion = python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')"
|
||||
Write-Host "Python: $pythonVersion"
|
||||
|
||||
Write-Host "Installing NightShift in editable mode..."
|
||||
python -m pip install -e .
|
||||
|
||||
$scriptsDir = python -c "import sysconfig; print(sysconfig.get_path('scripts'))"
|
||||
$pathParts = $env:Path -split ";" | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }
|
||||
if ($pathParts -notcontains $scriptsDir) {
|
||||
if (Ask-YesNo "Add Python scripts directory to your user PATH so 'nightshift' works in new terminals? $scriptsDir") {
|
||||
Add-UserPath $scriptsDir
|
||||
Write-Host "Added to user PATH: $scriptsDir"
|
||||
} else {
|
||||
Write-Host "Skipped PATH update. You can still run: python -m nightshift.cli"
|
||||
}
|
||||
} else {
|
||||
Write-Host "PATH already includes Python scripts directory."
|
||||
}
|
||||
|
||||
if (Test-Command "nightshift") {
|
||||
Write-Host "NightShift CLI is available:"
|
||||
nightshift --help | Select-Object -First 5
|
||||
} else {
|
||||
Write-Host "NightShift CLI is not visible in this shell yet. Open a new terminal or run: python -m nightshift.cli --help"
|
||||
}
|
||||
|
||||
if (Test-Command "ollama") {
|
||||
Write-Host "Ollama is installed:"
|
||||
ollama --version
|
||||
} else {
|
||||
Write-Host "Ollama was not found."
|
||||
if (Test-Command "winget") {
|
||||
if (Ask-YesNo "Install Ollama with winget now?") {
|
||||
winget install --id Ollama.Ollama -e
|
||||
} else {
|
||||
Write-Host "Skipped Ollama install. Install later from https://ollama.com/download"
|
||||
}
|
||||
} else {
|
||||
Write-Host "winget was not found. Install Ollama from https://ollama.com/download"
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Setup complete."
|
||||
Write-Host "Validate this repo with: nightshift validate"
|
||||
Write-Host "Start the dashboard with: nightshift web"
|
||||
116
setup.sh
Normal file
116
setup.sh
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
YES=0
|
||||
if [ "${1:-}" = "-y" ] || [ "${1:-}" = "--yes" ]; then
|
||||
YES=1
|
||||
fi
|
||||
|
||||
ask_yes_no() {
|
||||
question="$1"
|
||||
default="${2:-yes}"
|
||||
if [ "$YES" -eq 1 ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "$default" = "yes" ]; then
|
||||
prompt="[Y/n]"
|
||||
else
|
||||
prompt="[y/N]"
|
||||
fi
|
||||
printf "%s %s " "$question" "$prompt"
|
||||
read answer
|
||||
if [ -z "$answer" ]; then
|
||||
[ "$default" = "yes" ]
|
||||
return
|
||||
fi
|
||||
case "$answer" in
|
||||
y|Y|yes|YES|Yes) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
has_command() {
|
||||
command -v "$1" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
repo_root=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||
cd "$repo_root"
|
||||
|
||||
echo "NightShift setup"
|
||||
echo "Repo: $repo_root"
|
||||
|
||||
if has_command python3; then
|
||||
PYTHON=python3
|
||||
elif has_command python; then
|
||||
PYTHON=python
|
||||
else
|
||||
echo "Python was not found on PATH. Install Python 3.11+ and rerun setup.sh." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Python: $($PYTHON -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
|
||||
|
||||
echo "Installing NightShift in editable mode..."
|
||||
$PYTHON -m pip install -e .
|
||||
|
||||
scripts_dir=$($PYTHON -c 'import sysconfig; print(sysconfig.get_path("scripts"))')
|
||||
case ":$PATH:" in
|
||||
*":$scripts_dir:"*)
|
||||
echo "PATH already includes Python scripts directory."
|
||||
;;
|
||||
*)
|
||||
if ask_yes_no "Add Python scripts directory to PATH in your shell profile? $scripts_dir" "yes"; then
|
||||
shell_name=$(basename "${SHELL:-sh}")
|
||||
case "$shell_name" in
|
||||
zsh) profile="$HOME/.zshrc" ;;
|
||||
bash) profile="$HOME/.bashrc" ;;
|
||||
*) profile="$HOME/.profile" ;;
|
||||
esac
|
||||
line="export PATH=\"$scripts_dir:\$PATH\""
|
||||
if [ -f "$profile" ] && grep -F "$scripts_dir" "$profile" >/dev/null 2>&1; then
|
||||
echo "Profile already mentions $scripts_dir"
|
||||
else
|
||||
printf "\n# NightShift CLI\n%s\n" "$line" >> "$profile"
|
||||
echo "Added PATH update to $profile"
|
||||
fi
|
||||
export PATH="$scripts_dir:$PATH"
|
||||
else
|
||||
echo "Skipped PATH update. You can still run: $PYTHON -m nightshift.cli"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
if has_command nightshift; then
|
||||
echo "NightShift CLI is available:"
|
||||
nightshift --help | sed -n '1,5p'
|
||||
else
|
||||
echo "NightShift CLI is not visible in this shell yet. Open a new terminal or run: $PYTHON -m nightshift.cli --help"
|
||||
fi
|
||||
|
||||
if has_command ollama; then
|
||||
echo "Ollama is installed:"
|
||||
ollama --version
|
||||
else
|
||||
echo "Ollama was not found."
|
||||
os_name=$(uname -s 2>/dev/null || echo unknown)
|
||||
if [ "$os_name" = "Darwin" ] && has_command brew; then
|
||||
if ask_yes_no "Install Ollama with Homebrew now?" "yes"; then
|
||||
brew install ollama
|
||||
else
|
||||
echo "Skipped Ollama install. Install later from https://ollama.com/download"
|
||||
fi
|
||||
elif [ "$os_name" = "Linux" ]; then
|
||||
if ask_yes_no "Install Ollama with the official install script now?" "no"; then
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
else
|
||||
echo "Skipped Ollama install. Install later from https://ollama.com/download"
|
||||
fi
|
||||
else
|
||||
echo "Install Ollama from https://ollama.com/download"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Setup complete."
|
||||
echo "Validate this repo with: nightshift validate"
|
||||
echo "Start the dashboard with: nightshift web"
|
||||
|
|
@ -547,6 +547,45 @@ Acceptance Criteria:
|
|||
self.assertFalse((task_dir / "normalized.patch").exists())
|
||||
self.assertFalse((task_dir / "patch-validation.md").exists())
|
||||
|
||||
def test_file_writer_invalid_output_gets_strict_rerun(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
_write_common_files(root)
|
||||
(root / "app.py").write_text("old\n", encoding="utf-8")
|
||||
(root / "fake_writer.py").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"import sys",
|
||||
"prompt = sys.stdin.read()",
|
||||
"if 'Previous file_writer output was invalid' not in prompt:",
|
||||
" print('lookup failed')",
|
||||
"else:",
|
||||
" print('```file:app.py')",
|
||||
" print('new')",
|
||||
" print('```')",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
stages = (
|
||||
StageConfig(id="write", type="file_writer", agent="writer"),
|
||||
StageConfig(id="validate", type="patch_validator"),
|
||||
)
|
||||
config = make_config(root, stages)
|
||||
config.agents["writer"] = AgentConfig(
|
||||
id="writer",
|
||||
backend="command",
|
||||
command="python fake_writer.py",
|
||||
system_prompt=Path("planner.md"),
|
||||
)
|
||||
runner = PipelineRunner(config, ArtifactStore(root, ".nightshift", run_id="test-run"))
|
||||
|
||||
result = runner.run_task(parse_tasks(TASK_MD)[0])
|
||||
|
||||
patch = root / ".nightshift" / "runs" / "test-run" / "tasks" / "TASK-001" / "proposed.patch"
|
||||
self.assertEqual(result.status, "complete")
|
||||
self.assertIn("+new", patch.read_text(encoding="utf-8"))
|
||||
|
||||
def test_patch_validator_rejects_unsafe_patch(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as directory:
|
||||
root = Path(directory)
|
||||
|
|
|
|||
|
|
@ -29,6 +29,18 @@ class ReportGeneratorTests(unittest.TestCase):
|
|||
reporter = ReportGenerator(root, artifacts)
|
||||
task = parse_tasks(TASK_MD)[0]
|
||||
context_out = artifacts.write_stage_output(task.id, "context-out.md", "# Context Out\n")
|
||||
artifacts.run_log_path.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"2026-05-17T00:00:00Z | stage.start | Starting stage | stage_id=plan | stage_type=agent",
|
||||
"2026-05-17T00:00:01Z | tool.call | Running repo lookup tool | path=. | pattern=def parse\\( | tool=grep",
|
||||
"2026-05-17T00:00:02Z | stage.start | Starting stage | stage_id=implement | stage_type=file_writer",
|
||||
"2026-05-17T00:00:03Z | tool.call | Running repo lookup tool | path=lisp.py | tool=read_file",
|
||||
"2026-05-17T00:00:04Z | command.start | Starting command | command=python -m unittest | stage_id=test",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
report = reporter.write_reports(
|
||||
task,
|
||||
|
|
@ -53,7 +65,11 @@ class ReportGeneratorTests(unittest.TestCase):
|
|||
self.assertIn("Retry count: 1", report.final_notes_path.read_text(encoding="utf-8"))
|
||||
self.assertIn("test", report.stage_results_path.read_text(encoding="utf-8"))
|
||||
self.assertIn("Final notes", report.run_summary_path.read_text(encoding="utf-8"))
|
||||
self.assertIn("Tests reported", report.devlog_path.read_text(encoding="utf-8"))
|
||||
devlog = report.devlog_path.read_text(encoding="utf-8")
|
||||
self.assertIn("Tests reported", devlog)
|
||||
self.assertIn("Planner: searched", devlog)
|
||||
self.assertIn("Implementer: read `lisp.py`", devlog)
|
||||
self.assertIn("test: ran `python -m unittest`", devlog)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user