nightshift/nightshift/project_templates/tutorial-pastebin/nightshift.yaml
K. Hodges 3bb5bd4157 Fixes based on tests, do tdd
Changed the pastebin tutorial so it now starts skeletal: no prebuilt Flask behavior, no pre-generated task tests, and .gitkeep placeholders under templates/ and tests/. The new pipeline  in nightshift/project_templates/tutorial-pastebin/nightshift.yaml:1 now runs:

  plan -> semantic_context -> context -> write_tests -> review_tests -> implement -> pytest -> review
                                                                                                                                                                                           ────────────────────────────────────────────────────
  Added nightshift/project_templates/tutorial-pastebin/.nightshift/agents/test-writer.md:1, tightened the planner/implementer/reviewer/debugger prompts, mirrored the pipeline docs/
  example, and raised default retries to 6 for the basic starter plus pastebin.

  I also fixed the retry policy issue in nightshift/escalation.py:17 and nightshift/pipeline.py:251: configured repeated-failure thresholds are now respected instead of hard-stopping      in nightshift/project_templates/tutorial-pastebin/
  early after three same-stage/same-cause failures. Non-implementation file_writer stages now get stage-specific retry artifacts so test generation does not collide with implementation
  repair artifacts
2026-05-20 21:51:40 -07:00

161 lines
3.3 KiB
YAML

project:
name: pastebin
root: .
task_file: .nightshift/tasks.md
artifact_dir: .nightshift
safety:
require_clean_worktree: false
scoped_paths:
- src
- tests
- templates
- pyproject.toml
- README.md
allowed_commands:
- python -m pytest -q
forbidden_commands:
- rm -rf
- git push
- curl | bash
experiment:
label: pastebin-model-fallback
prompt_variant: tdd-qwen-omnicoder-deepseek-v2
agents:
planner:
backend: ollama
model: qwen2.5-coder:14b
temperature: 0.2
system_prompt: .nightshift/agents/planner.md
implementer_qwen:
backend: ollama
model: qwen2.5-coder:14b
temperature: 0.1
system_prompt: .nightshift/agents/implementer.md
test_writer:
backend: ollama
model: qwen2.5-coder:14b
temperature: 0.1
system_prompt: .nightshift/agents/test-writer.md
implementer_omnicoder:
backend: ollama
model: carstenuhlig/omnicoder-9b
temperature: 0.1
system_prompt: .nightshift/agents/implementer.md
implementer_deepseek:
backend: ollama
model: deepseek-coder-v2:16b
temperature: 0.1
system_prompt: .nightshift/agents/implementer.md
debugger:
backend: ollama
model: qwen2.5-coder:14b
role: debugger
temperature: 0.1
system_prompt: .nightshift/agents/debugger.md
reviewer:
backend: ollama
model: qwen2.5-coder:14b
temperature: 0.1
system_prompt: .nightshift/agents/reviewer.md
pipeline:
max_task_retries: 6
stop_on_repeated_failure_signature_after: 6
continue_on_task_failure: false
stages:
- id: plan
type: agent
agent: planner
output: plan.md
- id: semantic_context
type: semantic_context
output: semantic-context.md
- id: context
type: repo_context
output: context-pack.md
- id: write_tests
type: file_writer
agent: test_writer
output: proposed-tests.patch
- id: normalize_tests
type: patch_normalizer
output: normalized-tests.patch
- id: validate_tests_patch
type: patch_validator
output: test-patch-validation.md
max_files: 6
max_lines: 500
max_delete_ratio: 0.70
on_fail: write_tests
- id: apply_tests_patch
type: patch_apply
mode: apply
output: test-patch-apply-output.txt
on_fail: write_tests
- id: review_tests
type: agent_review
agent: reviewer
output: test-review.md
on_fail: write_tests
- id: implement
type: file_writer
agent_pool:
- implementer_qwen
- implementer_omnicoder
- implementer_deepseek
output: proposed.patch
- id: normalize
type: patch_normalizer
output: normalized.patch
- id: validate_patch
type: patch_validator
output: patch-validation.md
max_files: 12
max_lines: 900
max_delete_ratio: 0.70
on_fail: implement
- id: apply_patch
type: patch_apply
mode: apply
output: patch-apply-output.txt
on_fail: implement
- id: test
type: command
commands:
- python -m pytest -q
output: test-output.txt
shell: true
timeout_seconds: 25
on_fail: implement
- id: review
type: agent_review
agent: reviewer
output: review.md
on_fail: implement
- id: summarize
type: summarize
output: final-notes.md