From 90e4c801160cc401c190f5aa26cfa16521d0beb9 Mon Sep 17 00:00:00 2001 From: "K. Hodges" Date: Thu, 21 May 2026 04:14:05 -0700 Subject: [PATCH] bugfixes after test run and terminal status --- docs/ideas.md | 304 ++++--------------------------- docs/writer-idea.md | 396 +++++++++++++++++++++++++++++++++++++++++ nightshift/cli.py | 13 +- nightshift/runlog.py | 41 ++++- nightshift/terminal.py | 16 +- tests/test_terminal.py | 44 ++++- 6 files changed, 533 insertions(+), 281 deletions(-) create mode 100644 docs/writer-idea.md diff --git a/docs/ideas.md b/docs/ideas.md index 848c6c2..8661d2b 100644 --- a/docs/ideas.md +++ b/docs/ideas.md @@ -1,194 +1,17 @@ # Ideas TODO -This file is now prioritized inline. Priority scale: +This file tracks open ideas only. Completed items should be removed after they land. + +Priority scale: - P0: do next; directly improves current feedback loop - P1: important after the current loop is usable - P2: useful, but only after basics are stable - P3: defer or maybe reject -## P0: Make Integration Tests Easy To Run - -Status: implemented. - -Implemented command: - -```powershell -python -m nightshift.cli integ-test --template tutorial-deaddrop --task TASK-001 -``` - -It creates the integration sandbox, sets up the venv, runs validation through setup, runs the task from the generated project directory, and prints the artifact root. Use `--dry-run` to preview the setup and task command. - -Running integration tests is still too manual. - -Current process: - -- install the current version of NightShift -- run `python -m nightshift.cli integ-run --template tutorial-deaddrop --setup` -- copy the activation line from the output and run it -- `cd` into the generated directory -- run the task there, because running from the repo root does not find `nightshift.yaml` - -Recommendation: implement a wrapper command, not just a loose script. - -Target command: - -```powershell -python -m nightshift.cli integ-test --template tutorial-deaddrop --task TASK-001 -``` - -It should: - -1. create the integration run -2. set up the venv -3. install NightShift from the current checkout -4. run `nightshift validate` -5. run the selected task from the generated project directory -6. print final status and artifact path - -Useful variants: - -```powershell -python -m nightshift.cli integ-test --template tutorial-deaddrop --all -python -m nightshift.cli integ-test --template tutorial-deaddrop --task TASK-002 --keep 3 -``` - -The base-directory config issue may not be a core bug, but it is bad UX. The wrapper should handle `cwd` correctly. - -## P0/P1: Remove Multi-Candidate Workflow From Default DeadDrop - -Status: implemented for the default DeadDrop template and tutorial example. - -Original idea: - -- The multi-candidate workflow does not add as much as expected. -- Keep it as an example, maybe `example-multiagent`. - -Recommendation: yes. Remove it from the default DeadDrop tutorial. - -Reason: - -- DeadDrop is becoming the reliability harness. -- Multi-candidate fallback makes artifacts harder to reason about. -- It adds model variability while we are still debugging pipeline behavior. - -Better split: - -```text -tutorial-deaddrop -tutorial-deaddrop-multiagent -``` - -or: - -```text -examples/templates/multiagent-fallback -``` - -Default DeadDrop should be boring: - -```text -planner -> semantic_context -> context -> implement -> validate -> test -> review -``` - -Use one strong implementer first. Add fallback only in a separate experiment template. - -## P1: Add A Qwen3 / 30B DeadDrop Variant - -Status: implemented as the default DeadDrop model path using `qwen3-coder:30b`. - -Original idea: - -- Use a non-coder model for planner roles. -- Try `qwen3.6:27b` for planning. -- Use `qwen3-coder:30b` for implementer and code-heavy roles. - -Recommendation: viable, but make this a variant, not the default. - -kass reply- No lets make this the default. the qwen3-coder:30b is fast now for me for some reason. - -Suggested template/config: - -```text -tutorial-deaddrop-qwen3 -``` - -Possible role split: - -- planner: `qwen3.6:27b` -- reviewer/debugger: `qwen3.6:27b` -- implementer: `qwen3-coder:30b` or exact local 30B coder model name - -Important: confirm exact model names with: - -```powershell -ollama list -``` - -i did its `qwen3-coder:30b` - -Use 30B where it pays: - -- first implementation for hard tasks -- repair after concrete test failure -- schema/database changes -- multi-file changes - -Do not blindly make every stage 30B if it is slow. - -reply: Its not slow now!`qwen3-coder:30b` - -## P2: Expose More Model Parameters - -Status: implemented for the practical first set. - -Supported optional Ollama fields now include `num_ctx`, `num_predict`, `seed`, and `stop`, in addition to existing `temperature`. - -Original question: - -- What else besides temperature is available? -- Are any worth optimizing? - -Likely useful for Ollama: - -- `temperature` -- `num_ctx` -- `num_predict` -- `seed` -- `stop` -- maybe `top_p`, `top_k`, `repeat_penalty` - -Recommendation: add only a small practical set first. - -Useful config shape: - -```yaml -temperature: 0.1 -num_ctx: 8192 -num_predict: 4096 -seed: 1 -``` - -Most useful: - -- `num_ctx`: larger repo/task context -- `num_predict`: caps runaway output -- `seed`: reproducibility, if supported consistently -- `temperature`: already useful; keep low for code -- `stop`: could help enforce file-block or diff-only contracts - -Defer tuning `top_p`, `top_k`, and `repeat_penalty` unless a specific model needs it. - -reply: yup lets put this in the nightshift.yaml (optional parameters, if they arent in there that's fine, but we should offer them.) - ## P1: Add Test Governance For Generated Tests -Original idea: - -- Have a test governance layer for when agents write tests. -- A reviewer validates alignment with acceptance criteria. - -Recommendation: yes, but only for generated-test mode. Do not put generated tests back into default DeadDrop yet. +Use this only for generated-test mode. Do not put generated tests back into the default DeadDrop fixed-test pipeline yet. The previous failures proved test-writing agents will: @@ -208,19 +31,13 @@ Deterministic checks: - tests do not import undeclared dependencies - tests do not define Flask routes or app implementation - test names match current task id or current artifact -- no future-task keywords unless accepted by current task AC +- no future-task keywords unless accepted by current task acceptance criteria Then optional model reviewer checks acceptance-criteria alignment. ## P2: Add A Test Analyzer Agent For TDD -Original idea: - -- Analyze tests. -- Translate them into direct instructions for the implementer. -- Maybe implement using agent YAML definitions without new NightShift features. - -Recommendation: viable, but defer until generated tests are stable. +Defer until generated tests are stable. Possible pipeline: @@ -244,12 +61,7 @@ This may help smaller models, but it is another model output that can be wrong. ## P2/P3: Add A Test Planner -Original idea: - -- A test planner understands acceptance criteria and code. -- Provides input to the next stage about constraints and code, especially for non-TDD. - -Recommendation: maybe, but defer. +Maybe, but defer. This overlaps with: @@ -267,85 +79,8 @@ test_planner -> write_tests -> test_governance -> implement For now, fold this idea into the future test governance/analyzer work. -## P1: Add Fixed Tests For All DeadDrop Tasks - -Status: mostly implemented in the template. - -Current fixed tests: - -```text -tests/test_task001.py -tests/test_task002.py -tests/test_task003.py -tests/test_task004.py -tests/test_task005.py -``` - -Important design: - -```yaml -python -m pytest -q tests/test_{task_id_compact}.py -``` - -This lets all future task tests exist without breaking earlier tasks. - -Next step: validate these through integration runs, one task at a time. - -## P1: Add `nightshift integ-report` - -Status: implemented as a first-pass artifact summarizer. - -New idea. - -Summarize latest integration run across tasks: - -```text -TASK-001 complete in 1 retry -TASK-002 failed at validate_patch -Root cause: protected tests modified -Artifacts: ... -``` - -Right now we inspect artifacts manually. NightShift should do more of that. - -Possible command: - -```powershell -python -m nightshift.cli integ-report --latest -``` - -## P1: Add Task-Test Preflight To `validate` - -Status: implemented. - -`nightshift validate` now renders task command placeholders for every task and fails early if a configured `tests/test_*.py` path is missing. - -Partially implemented at run time. - -Current behavior: - -- task command placeholders can render paths like `tests/test_task002.py` -- `run_task` preflight fails before invoking agents if the task-specific test file is missing - -Better behavior: - -```powershell -nightshift validate -``` - -should warn or fail: - -```text -TASK-003 expects tests/test_task003.py and it exists. -TASK-004 expects tests/test_task004.py and it exists. -``` - -This catches missing fixed tests earlier. - ## P2: Add Run Comparison -New idea. - Useful once comparing 14B vs 30B: ```powershell @@ -364,3 +99,28 @@ Show: This should come after `integ-test` and `integ-report`. +## P2: Add A Separate Multiagent/Fallback DeadDrop Experiment + +Keep the default DeadDrop template boring and deterministic: + +```text +planner -> semantic_context -> context -> implement -> validate -> test -> review +``` + +If fallback is useful, put it in a separate experiment template, for example: + +```text +tutorial-deaddrop-multiagent +``` + +or: + +```text +examples/templates/multiagent-fallback +``` + +Reason: + +- fallback makes artifacts harder to reason about +- model variability is bad while debugging pipeline behavior +- the default template should remain the reliability harness diff --git a/docs/writer-idea.md b/docs/writer-idea.md new file mode 100644 index 0000000..b16041c --- /dev/null +++ b/docs/writer-idea.md @@ -0,0 +1,396 @@ +# Agentic Novel Writing Workflow Idea + +NightShift could plausibly support non-coding workflows, especially long-form fiction, because the core abstraction is not actually "write code." It is: + +- read task context +- call one or more agents +- produce artifacts +- validate outputs +- update project state +- move to the next task + +That maps surprisingly well to writing a novel. + +## Core Realization + +A novel workflow should not ask one model to write the whole book, or even necessarily one whole chapter. + +The durable project files would act like the source of truth: + +- `worldbuilding.md` +- `characters.md` +- `plot-state.md` +- `style-guide.md` +- `outline.md` +- `chapters/chapter-001.md` +- `chapters/chapter-001-scene-001.md` +- `tasks.md` + +The task file would drive the work, similar to coding tasks: + +```text +- [ ] SCENE-001: Opening scene at the border checkpoint + +Description: +Write the opening scene where Mara tries to enter the city under a false work permit. + +Acceptance Criteria: +- Introduces Mara's immediate goal +- Shows the checkpoint culture without exposition dump +- Mentions the salt tax conflict indirectly +- Ends with the inspector noticing the forged seal +- 900-1400 words +- Maintains close third-person POV +``` + +NightShift would run one scene or section at a time. + +## What We Already Have + +NightShift already has several useful primitives: + +- task files for chunking the novel into scenes or chapter sections +- scoped paths so agents only edit allowed writing/project files +- artifact output so drafts, reviews, and notes are preserved +- retry loops for revision +- planner/reviewer/debugger-style roles +- repo context and semantic context retrieval +- command stages that could run deterministic checks +- file-writer stages that can update Markdown files +- `lookup_requests` so agents can ask to read worldbuilding or prior scenes + +That means this may not require a totally new engine. It may mostly need a new template and some writing-specific validation/review stages. + +## Likely Workflow + +One practical pipeline: + +```text +plan_scene +gather_context +draft_scene +validate_scene +continuity_review +style_review +update_plot_state +summarize +``` + +Possible roles: + +- Planner: turns the scene task into a beat plan. +- Context agent: pulls relevant worldbuilding, character, and plot-state excerpts. +- Drafting agent: writes the scene. +- Continuity reviewer: checks contradictions against known state. +- Style reviewer: checks POV, tone, pacing, and prose constraints. +- State updater: updates `plot-state.md`, `characters.md`, and maybe `timeline.md`. + +## Chunking Strategy + +Do not make a task equal to "write chapter 4" unless chapters are short. + +Better units: + +- scene +- scene fragment +- chapter section +- revision pass for one scene +- continuity update after one scene +- prose polish for one scene + +A chapter can be assembled from multiple scene files: + +```text +chapters/ + chapter-001/ + scene-001.md + scene-002.md + scene-003.md + chapter-001.md +``` + +Then a later command or agent stage can compile `chapter-001.md`. + +## Durable State Files + +The most important design piece is explicit state. + +Recommended files: + +```text +story/ + worldbuilding.md + style-guide.md + characters.md + timeline.md + plot-state.md + unresolved-threads.md + continuity-rules.md + outline.md + chapters/ +``` + +`plot-state.md` should be updated after every completed scene. + +It should track: + +- current character locations +- known secrets +- promises made to the reader +- unresolved questions +- relationships +- injuries/resources/items +- timeline date/time +- what each POV character currently knows + +This is the fiction equivalent of application state. + +## Validation Ideas + +Some checks can be deterministic: + +- word count range +- file exists +- only allowed files changed +- Markdown heading format +- no forbidden placeholders like `TODO`, `[insert]`, or `TBD` +- no accidental author notes in final prose +- required task terms are present +- output compiles into a chapter file + +Some checks need model review: + +- continuity with worldbuilding +- character voice consistency +- POV discipline +- pacing +- whether the scene satisfies the beat plan +- whether exposition is too direct +- whether the state update accurately reflects the scene + +The key is not to overtrust model review. It should produce actionable retry notes, not silently bless everything. + +## What Might Be Missing + +### 1. Better Non-Code Templates + +This likely needs a dedicated template: + +```text +tutorial-deaddrop +tutorial-novel +``` + +or: + +```text +writer-novel +``` + +The template would include: + +- starter story files +- writing prompts +- task examples +- validation commands +- allowed paths +- recommended pipeline + +### 2. Better Markdown Patch/File Handling + +The current file-writer flow can work, but fiction output may be long. It may be safer to require complete file blocks for one scene file at a time. + +The workflow should avoid having an agent rewrite the whole novel or whole `plot-state.md` unless necessary. + +### 3. Stronger State Update Governance + +The risky part is not drafting prose. The risky part is bad state updates. + +Example failure: + +- the scene says Mara never saw the prince +- the state updater records that Mara recognized the prince +- future scenes build on the wrong state + +A state update should probably be reviewed against the actual scene before being applied. + +Possible pipeline: + +```text +draft_scene -> review_scene -> propose_state_update -> review_state_update -> apply +``` + +### 4. Context Window Management + +Worldbuilding documents can get large. + +The agent should not receive the entire story bible every time. It should receive: + +- the current task +- relevant worldbuilding excerpts +- relevant character entries +- recent scene summaries +- current plot state +- style guide + +Semantic search is probably enough for a first version, but a novel template may want a more explicit index: + +```text +world-index.md +character-index.md +location-index.md +``` + +### 5. Scene Dependency Tracking + +Coding tasks already have dependencies. Fiction tasks would need the same: + +```text +Dependencies: +- SCENE-001 +- SCENE-002 +``` + +This prevents writing a later scene before the required earlier story state exists. + +### 6. Revision Workflows + +Writing is not only forward generation. + +Useful task types: + +- draft new scene +- revise scene for pacing +- revise dialogue +- continuity repair +- line edit +- chapter assembly +- chapter-level review +- update outline after discovery writing + +NightShift can already represent these as tasks, but the prompts should distinguish them clearly. + +### 7. Output Length Controls + +Long fiction output needs explicit limits. + +Use: + +- scene word count bounds +- `num_predict` +- task acceptance criteria +- smaller scene files + +Do not ask for "write chapter 12" unless the chapter has already been broken into beats. + +## Suggested First Template + +Start with a minimal `writer-novel` template. + +Files: + +```text +nightshift.yaml +.nightshift/tasks.md +.nightshift/agents/planner.md +.nightshift/agents/drafter.md +.nightshift/agents/continuity-reviewer.md +.nightshift/agents/style-reviewer.md +.nightshift/agents/state-updater.md +story/worldbuilding.md +story/characters.md +story/style-guide.md +story/plot-state.md +story/timeline.md +story/unresolved-threads.md +story/chapters/.gitkeep +``` + +Pipeline: + +```text +plan +semantic_context +context +draft +validate_draft +continuity_review +style_review +update_state +validate_state +summarize +``` + +Allowed paths: + +```yaml +scoped_paths: + - story + - .nightshift/tasks.md +``` + +Draft stage allowed paths: + +```yaml +allowed_paths: + - story/chapters +``` + +State update stage allowed paths: + +```yaml +allowed_paths: + - story/plot-state.md + - story/characters.md + - story/timeline.md + - story/unresolved-threads.md +``` + +That separation matters. The drafter should not freely rewrite the world bible, and the state updater should not rewrite the scene prose. + +## What We Should Not Do First + +Do not start with: + +- automatic full-plot generation +- full chapter generation +- global rewrites of all prior chapters +- one giant `worldbuilding.md` dumped into every prompt +- trusting the model to maintain continuity without explicit state files + +Those are likely to produce impressive-looking but unstable output. + +## Practical First Experiment + +A good first test: + +1. Create a tiny worldbuilding document. +2. Create three characters. +3. Create five scene tasks. +4. Have NightShift draft one scene at a time. +5. After each scene, update `plot-state.md`. +6. Run continuity review against only the scene, state files, and relevant worldbuilding. +7. Inspect artifacts. + +Success criteria: + +- scenes land in the right files +- word counts stay bounded +- state updates are accurate +- future scenes use prior state correctly +- reviewers catch obvious contradictions + +## Bottom Line + +Theoretically, NightShift already has many of the needed utilities. + +The missing piece is mostly a writing-oriented template with: + +- scene-sized tasks +- durable story state files +- strict path separation between prose and state updates +- writing-specific prompts +- lightweight deterministic validators +- continuity/style review stages + +This is viable, but it should start as a constrained scene-writing workflow, not an autonomous novel generator. diff --git a/nightshift/cli.py b/nightshift/cli.py index 7dff729..e25cdef 100644 --- a/nightshift/cli.py +++ b/nightshift/cli.py @@ -55,7 +55,7 @@ def build_parser() -> argparse.ArgumentParser: run_parser.add_argument("--all", action="store_true", help="Run all runnable incomplete tasks.") run_parser.add_argument( "--animation", - default="agent_thinking", + default="status_dots", choices=tuple(sorted(HOTDOG_ANIMATIONS)), help="Terminal animation to show while the run is active.", ) @@ -210,13 +210,13 @@ def main(argv: list[str] | None = None) -> int: validate_task_dependencies(tasks) if args.all and args.task: parser.error("run accepts either --all or --task, not both.") - runner = PipelineRunner(config, logger=RunLogger(console=print)) if args.all: with TerminalAnimation( args.animation, - message="NightShift running all tasks", + message="Starting all tasks", enabled=not args.no_animation, - ): + ) as animation: + runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message)) result = runner.run_tasks(tasks) print(f"Status: {result.status}") print(f"Tasks run: {len(result.task_results)}") @@ -229,9 +229,10 @@ def main(argv: list[str] | None = None) -> int: ensure_dependencies_satisfied(tasks, task) with TerminalAnimation( args.animation, - message=f"NightShift running {task.id}", + message=f"Task: {task.id} | Starting", enabled=not args.no_animation, - ): + ) as animation: + runner = PipelineRunner(config, logger=RunLogger(console=print, status=animation.update_message)) result = runner.run_task(task) print(f"Task: {result.task_id}") print(style_text(f"Status: {result.status}", color=_status_color(result.status), bold=True)) diff --git a/nightshift/runlog.py b/nightshift/runlog.py index 76ff2dc..2e4a9d8 100644 --- a/nightshift/runlog.py +++ b/nightshift/runlog.py @@ -12,6 +12,7 @@ from .terminal import format_console_event_line, format_plain_event_line ConsoleWriter = Callable[[str], None] +StatusWriter = Callable[[str], None] @dataclass(frozen=True) @@ -24,8 +25,9 @@ class LogEvent: class RunLogger: """Write concise operational events to CLI and run log artifacts.""" - def __init__(self, console: ConsoleWriter | None = None) -> None: + def __init__(self, console: ConsoleWriter | None = None, status: StatusWriter | None = None) -> None: self.console = console + self.status = status self._run_log_path: Path | None = None self._aggregate_log_path: Path | None = None self._initialized_run_logs: set[Path] = set() @@ -45,6 +47,10 @@ class RunLogger: line = format_plain_event_line(timestamp, event, message, safe_fields) if self.console is not None: self.console(format_console_event_line(timestamp, event, message, safe_fields)) + if self.status is not None: + status_message = format_status_event_message(event, message, safe_fields) + if status_message: + self.status(status_message) for path in (self._run_log_path,): if path is None: continue @@ -69,6 +75,39 @@ def format_log_line(log_event: LogEvent) -> str: return format_plain_event_line(timestamp, log_event.event, log_event.message, log_event.fields) +def format_status_event_message(event: str, message: str, fields: dict[str, object]) -> str | None: + task_id = str(fields.get("task_id", "") or "") + retry = fields.get("retry_count") + retry_text = f" retry {retry}" if retry not in (None, "") else "" + stage_id = str(fields.get("stage_id", "") or "") + stage_type = str(fields.get("stage_type", "") or "") + agent_id = str(fields.get("agent_id", "") or "") + model = str(fields.get("model", "") or "") + command = str(fields.get("command", "") or "") + status = str(fields.get("status", "") or "") + next_stage = str(fields.get("next_stage", "") or "") + + prefix = f"Task: {task_id} | " if task_id else "" + if event == "task.start": + return f"Task: {task_id} | Starting" if task_id else "Starting task" + if event == "stage.start" and stage_id: + label = f"{stage_id} ({stage_type})" if stage_type else stage_id + return f"{prefix}Stage: {label}{retry_text}" + if event == "agent.start": + model_text = f" | Model: {model}" if model else "" + return f"{prefix}Agent: {agent_id or stage_id}{model_text}" + if event == "command.start": + return f"{prefix}Command: {command or stage_id}" + if event == "stage.retry": + return f"{prefix}Retrying after {stage_id} -> {next_stage}{retry_text}" + if event in {"stage.finish", "task.finish"} and status: + target = f"Stage: {stage_id}" if event == "stage.finish" and stage_id else "Task" + return f"{prefix}{target} {status}" + if event.endswith(".start"): + return f"{prefix}{message}" + return None + + def tail_lines(path: Path, limit: int = 100) -> list[str]: if limit <= 0: return [] diff --git a/nightshift/terminal.py b/nightshift/terminal.py index 662347c..46a954d 100644 --- a/nightshift/terminal.py +++ b/nightshift/terminal.py @@ -44,6 +44,13 @@ BANNER_MESSAGES = [ quote = random.choice(BANNER_MESSAGES) HOTDOG_ANIMATIONS = { + "status_dots": [ + "[. ]", + "[.. ]", + "[...]", + "[ ..]", + "[ .]", + ], "classic_dance": [ "🌭", "ヽ(🌭)οΎ‰", @@ -158,6 +165,7 @@ class TerminalAnimation: self._stop = threading.Event() self._thread: threading.Thread | None = None self._width = 0 + self._lock = threading.Lock() def __enter__(self) -> "TerminalAnimation": self.start() @@ -180,11 +188,17 @@ class TerminalAnimation: self._clear() self._thread = None + def update_message(self, message: str) -> None: + with self._lock: + self.message = message + def _run(self) -> None: index = 0 while not self._stop.is_set(): frame = self.frames[index % len(self.frames)] - text = f"{frame} {self.message}" + with self._lock: + message = self.message + text = f"{frame} | {message}" self._width = max(self._width, len(text)) self.stream.write("\r" + text.ljust(self._width)) self.stream.flush() diff --git a/tests/test_terminal.py b/tests/test_terminal.py index f86f2f3..1764323 100644 --- a/tests/test_terminal.py +++ b/tests/test_terminal.py @@ -5,7 +5,7 @@ import unittest from unittest.mock import patch from nightshift.artifacts import ArtifactStore -from nightshift.runlog import RunLogger +from nightshift.runlog import RunLogger, format_status_event_message from nightshift.terminal import ( HOTDOG_ANIMATIONS, TerminalAnimation, @@ -34,6 +34,7 @@ class TerminalStylingTests(unittest.TestCase): def test_animation_frames_fall_back_to_agent_thinking(self) -> None: self.assertEqual(animation_frames("missing"), tuple(HOTDOG_ANIMATIONS["agent_thinking"])) self.assertEqual(animation_frames("classic_dance"), tuple(HOTDOG_ANIMATIONS["classic_dance"])) + self.assertEqual(animation_frames("status_dots"), tuple(HOTDOG_ANIMATIONS["status_dots"])) def test_terminal_animation_is_disabled_for_non_tty(self) -> None: stream = StringIO() @@ -84,6 +85,47 @@ class TerminalStylingTests(unittest.TestCase): self.assertNotIn("\x1b[", run_log) self.assertNotIn("abc", run_log) + def test_run_logger_status_callback_gets_compact_stage_message(self) -> None: + with tempfile.TemporaryDirectory() as directory: + root = Path(directory) + artifacts = ArtifactStore(root, ".nightshift", run_id="test-run") + statuses: list[str] = [] + logger = RunLogger(status=statuses.append) + logger.bind(artifacts) + + logger.event( + "stage.start", + "Starting stage", + task_id="TASK-001", + stage_id="implement", + stage_type="file_writer", + retry_count=2, + ) + logger.event( + "agent.start", + "Starting agent", + task_id="TASK-001", + agent_id="implementer", + model="qwen3-coder:30b", + ) + + self.assertEqual(statuses[0], "Task: TASK-001 | Stage: implement (file_writer) retry 2") + self.assertEqual(statuses[1], "Task: TASK-001 | Agent: implementer | Model: qwen3-coder:30b") + + def test_format_status_event_message_reports_retries(self) -> None: + message = format_status_event_message( + "stage.retry", + "Redirecting after stage result", + { + "task_id": "TASK-001", + "stage_id": "test", + "next_stage": "implement", + "retry_count": 1, + }, + ) + + self.assertEqual(message, "Task: TASK-001 | Retrying after test -> implement retry 1") + if __name__ == "__main__": unittest.main()