mirror of
https://github.com/khodges42/nightShift.git
synced 2026-06-14 18:18:36 +00:00
fix guard and make stop repeat configurable
This commit is contained in:
parent
2f2146f47d
commit
c12493a248
BIN
docs/images/7dea0c14-54fc-459c-a739-835de945fa72.png
Normal file
BIN
docs/images/7dea0c14-54fc-459c-a739-835de945fa72.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 MiB |
|
|
@ -78,6 +78,7 @@ class PipelineConfig:
|
||||||
max_task_retries: int
|
max_task_retries: int
|
||||||
stages: tuple[StageConfig, ...]
|
stages: tuple[StageConfig, ...]
|
||||||
continue_on_task_failure: bool = False
|
continue_on_task_failure: bool = False
|
||||||
|
stop_on_repeated_failure_signature_after: int | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
@ -265,6 +266,14 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
pipeline_raw.get("continue_on_task_failure", False),
|
pipeline_raw.get("continue_on_task_failure", False),
|
||||||
"pipeline.continue_on_task_failure",
|
"pipeline.continue_on_task_failure",
|
||||||
)
|
)
|
||||||
|
stop_on_repeated_failure_signature_after = _optional_int_or_none(
|
||||||
|
pipeline_raw.get("stop_on_repeated_failure_signature_after"),
|
||||||
|
"pipeline.stop_on_repeated_failure_signature_after",
|
||||||
|
)
|
||||||
|
if stop_on_repeated_failure_signature_after is not None and stop_on_repeated_failure_signature_after < 2:
|
||||||
|
raise ConfigError(
|
||||||
|
"Config error: pipeline.stop_on_repeated_failure_signature_after must be two or greater."
|
||||||
|
)
|
||||||
|
|
||||||
stages_raw = pipeline_raw.get("stages")
|
stages_raw = pipeline_raw.get("stages")
|
||||||
if not isinstance(stages_raw, list) or not stages_raw:
|
if not isinstance(stages_raw, list) or not stages_raw:
|
||||||
|
|
@ -396,6 +405,7 @@ def parse_config(raw: dict[str, Any], config_path: Path) -> NightShiftConfig:
|
||||||
max_task_retries=max_task_retries,
|
max_task_retries=max_task_retries,
|
||||||
stages=tuple(stages),
|
stages=tuple(stages),
|
||||||
continue_on_task_failure=continue_on_task_failure,
|
continue_on_task_failure=continue_on_task_failure,
|
||||||
|
stop_on_repeated_failure_signature_after=stop_on_repeated_failure_signature_after,
|
||||||
),
|
),
|
||||||
experiment=experiment,
|
experiment=experiment,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -14,14 +14,32 @@ class EscalationDecision:
|
||||||
reason: str
|
reason: str
|
||||||
|
|
||||||
|
|
||||||
def evaluate_retry_churn(entries: tuple[RetryMemoryEntry, ...], *, retry_budget: int) -> EscalationDecision:
|
def evaluate_retry_churn(
|
||||||
|
entries: tuple[RetryMemoryEntry, ...],
|
||||||
|
*,
|
||||||
|
retry_budget: int,
|
||||||
|
repeated_signature_after: int | None = None,
|
||||||
|
) -> EscalationDecision:
|
||||||
if len(entries) < 2:
|
if len(entries) < 2:
|
||||||
return EscalationDecision(False, "continue", "Not enough retry history for churn detection.")
|
return EscalationDecision(False, "continue", "Not enough retry history for churn detection.")
|
||||||
recent = entries[-3:]
|
recent = entries[-3:]
|
||||||
same_stage = len({entry.stage_id for entry in recent}) == 1
|
same_stage = len({entry.stage_id for entry in recent}) == 1
|
||||||
same_cause = len({entry.cause for entry in recent}) == 1
|
same_cause = len({entry.cause for entry in recent}) == 1
|
||||||
|
recent_signatures = [entry.failure_signature for entry in entries[-2:] if entry.failure_signature]
|
||||||
|
same_signature = len(recent_signatures) == 2 and len(set(recent_signatures)) == 1
|
||||||
if len(entries) >= retry_budget and retry_budget > 0:
|
if len(entries) >= retry_budget and retry_budget > 0:
|
||||||
return EscalationDecision(True, "human review", "Configured retry budget is exhausted.")
|
return EscalationDecision(True, "human review", "Configured retry budget is exhausted.")
|
||||||
|
if (
|
||||||
|
repeated_signature_after is not None
|
||||||
|
and repeated_signature_after > 0
|
||||||
|
and len(entries) >= repeated_signature_after
|
||||||
|
and same_signature
|
||||||
|
):
|
||||||
|
return EscalationDecision(
|
||||||
|
True,
|
||||||
|
"debugger review or larger model",
|
||||||
|
"The same failure signature repeated on consecutive retries.",
|
||||||
|
)
|
||||||
if len(recent) == 3 and same_stage and same_cause:
|
if len(recent) == 3 and same_stage and same_cause:
|
||||||
return EscalationDecision(True, "debugger review or larger model", "The same stage is failing with the same reason repeatedly.")
|
return EscalationDecision(True, "debugger review or larger model", "The same stage is failing with the same reason repeatedly.")
|
||||||
return EscalationDecision(False, "continue", "No retry churn detected.")
|
return EscalationDecision(False, "continue", "No retry churn detected.")
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ def classify_failure(output: str, exit_code: int | None = None, modified_files:
|
||||||
text = output or ""
|
text = output or ""
|
||||||
lowered = text.lower()
|
lowered = text.lower()
|
||||||
failing_tests = extract_failing_tests(text)
|
failing_tests = extract_failing_tests(text)
|
||||||
|
exception_name = _extract_exception_name(text)
|
||||||
|
source_path, _ = _extract_traceback_location(text)
|
||||||
|
|
||||||
missing = re.search(r"No module named ['\"]([^'\"]+)['\"]", text, re.IGNORECASE)
|
missing = re.search(r"No module named ['\"]([^'\"]+)['\"]", text, re.IGNORECASE)
|
||||||
if not missing:
|
if not missing:
|
||||||
|
|
@ -48,6 +50,25 @@ def classify_failure(output: str, exit_code: int | None = None, modified_files:
|
||||||
"do not retry implementation until dependency is resolved",
|
"do not retry implementation until dependency is resolved",
|
||||||
failing_tests,
|
failing_tests,
|
||||||
)
|
)
|
||||||
|
if exception_name and source_path and _looks_like_project_source(source_path):
|
||||||
|
if exception_name in {"TypeError", "AttributeError"}:
|
||||||
|
return FailureClassification(
|
||||||
|
"API misuse",
|
||||||
|
f"The implementation is calling an API with an incompatible shape near `{source_path}`.",
|
||||||
|
0.82,
|
||||||
|
"Retry implementation with the exception and relevant call site.",
|
||||||
|
"retry implementation",
|
||||||
|
failing_tests,
|
||||||
|
)
|
||||||
|
if exception_name in {"NameError", "OperationalError", "KeyError", "ValueError", "IndexError"}:
|
||||||
|
return FailureClassification(
|
||||||
|
"logic bug",
|
||||||
|
f"The failure originates in project code near `{source_path}`.",
|
||||||
|
0.8,
|
||||||
|
"Send the traceback and touched files back to the implementer.",
|
||||||
|
"retry implementation",
|
||||||
|
failing_tests,
|
||||||
|
)
|
||||||
if re.search(r"\b(syntaxerror|indentationerror|importerror)\b", text, re.IGNORECASE):
|
if re.search(r"\b(syntaxerror|indentationerror|importerror)\b", text, re.IGNORECASE):
|
||||||
return FailureClassification(
|
return FailureClassification(
|
||||||
"syntax/import error",
|
"syntax/import error",
|
||||||
|
|
@ -113,6 +134,15 @@ def classify_failure(output: str, exit_code: int | None = None, modified_files:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def build_failure_signature(output: str, reason: str = "") -> str:
|
||||||
|
text = "\n".join(part for part in (reason, output) if part)
|
||||||
|
command = _extract_command(text)
|
||||||
|
exception_name = _extract_exception_name(text)
|
||||||
|
source_path, source_line = _extract_traceback_location(text)
|
||||||
|
parts = [part for part in (exception_name, source_path, source_line, command) if part]
|
||||||
|
return " | ".join(parts) if parts else "unknown-failure"
|
||||||
|
|
||||||
|
|
||||||
def extract_failing_tests(output: str) -> tuple[str, ...]:
|
def extract_failing_tests(output: str) -> tuple[str, ...]:
|
||||||
tests: list[str] = []
|
tests: list[str] = []
|
||||||
patterns = (
|
patterns = (
|
||||||
|
|
@ -128,6 +158,56 @@ def extract_failing_tests(output: str) -> tuple[str, ...]:
|
||||||
return tuple(tests)
|
return tuple(tests)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_exception_name(text: str) -> str:
|
||||||
|
candidates = []
|
||||||
|
for match in re.finditer(r"(?m)^(?:E\s+)?([A-Za-z0-9_.]+(?:Error|Exception|Warning|NameError|TypeError|AttributeError|KeyError|ValueError|IndexError)):\s*(.*)$", text):
|
||||||
|
candidates.append(match.group(1))
|
||||||
|
return candidates[-1] if candidates else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_traceback_location(text: str) -> tuple[str, str]:
|
||||||
|
candidates: list[tuple[int, str, str]] = []
|
||||||
|
for match in re.finditer(r'(?m)^\s*File "([^"]+)", line (\d+), in .+$', text):
|
||||||
|
path = match.group(1)
|
||||||
|
line = match.group(2)
|
||||||
|
candidates.append((_traceback_score(path), path, line))
|
||||||
|
for match in re.finditer(r"(?m)^.*?([A-Za-z]:[\\/][^:\n]+?\.py):(\d+):", text):
|
||||||
|
path = match.group(1)
|
||||||
|
line = match.group(2)
|
||||||
|
candidates.append((_traceback_score(path), path, line))
|
||||||
|
if not candidates:
|
||||||
|
return "", ""
|
||||||
|
candidates.sort(key=lambda item: item[0], reverse=True)
|
||||||
|
_, path, line = candidates[0]
|
||||||
|
return path, line
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_command(text: str) -> str:
|
||||||
|
candidates = []
|
||||||
|
for match in re.finditer(r"Command:\s*`([^`]+)`", text):
|
||||||
|
candidates.append(match.group(1))
|
||||||
|
return candidates[-1] if candidates else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_project_source(path: str) -> bool:
|
||||||
|
normalized = path.replace("\\", "/").lower()
|
||||||
|
return "/src/" in normalized or "/tests/" in normalized
|
||||||
|
|
||||||
|
|
||||||
|
def _traceback_score(path: str) -> int:
|
||||||
|
normalized = path.replace("\\", "/").lower()
|
||||||
|
score = 0
|
||||||
|
if normalized.endswith(".py"):
|
||||||
|
score += 1
|
||||||
|
if "/src/" in normalized:
|
||||||
|
score += 10
|
||||||
|
if "/tests/" in normalized:
|
||||||
|
score += 8
|
||||||
|
if "/site-packages/" in normalized or "/_pytest/" in normalized:
|
||||||
|
score -= 20
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
def format_failure_classification(result: FailureClassification, *, exit_code: int | None, modified_files: tuple[str, ...]) -> str:
|
def format_failure_classification(result: FailureClassification, *, exit_code: int | None, modified_files: tuple[str, ...]) -> str:
|
||||||
files = "\n".join(f"- `{path}`" for path in modified_files) or "- None"
|
files = "\n".join(f"- `{path}`" for path in modified_files) or "- None"
|
||||||
tests = "\n".join(f"- `{name}`" for name in result.failing_tests) or "- None"
|
tests = "\n".join(f"- `{name}`" for name in result.failing_tests) or "- None"
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from .dependencies import diagnose_python_dependencies, format_dependency_diagno
|
||||||
from .escalation import evaluate_retry_churn, format_escalation_decision
|
from .escalation import evaluate_retry_churn, format_escalation_decision
|
||||||
from .errors import PipelineError
|
from .errors import PipelineError
|
||||||
from .errors import NightShiftError
|
from .errors import NightShiftError
|
||||||
from .failures import classify_failure, format_failure_classification
|
from .failures import build_failure_signature, classify_failure, format_failure_classification
|
||||||
from .git import ensure_clean_worktree, write_diff_artifact, write_git_artifacts
|
from .git import ensure_clean_worktree, write_diff_artifact, write_git_artifacts
|
||||||
from .patches import (
|
from .patches import (
|
||||||
DEFAULT_FORBIDDEN_PATHS,
|
DEFAULT_FORBIDDEN_PATHS,
|
||||||
|
|
@ -232,7 +232,16 @@ class PipelineRunner:
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
memory_entry = entry_from_stage(retry_count, result, target_stage)
|
output = self._read_output(result.output_path)
|
||||||
|
failure_signature = ""
|
||||||
|
if stage.type in COMMAND_STAGE_TYPES:
|
||||||
|
failure_signature = build_failure_signature(output, result.reason)
|
||||||
|
memory_entry = entry_from_stage(
|
||||||
|
retry_count,
|
||||||
|
result,
|
||||||
|
target_stage,
|
||||||
|
failure_signature=failure_signature,
|
||||||
|
)
|
||||||
retry_memory.append(memory_entry)
|
retry_memory.append(memory_entry)
|
||||||
self.artifacts.write_stage_output(
|
self.artifacts.write_stage_output(
|
||||||
task.id,
|
task.id,
|
||||||
|
|
@ -242,6 +251,8 @@ class PipelineRunner:
|
||||||
decision = evaluate_retry_churn(
|
decision = evaluate_retry_churn(
|
||||||
tuple(retry_memory),
|
tuple(retry_memory),
|
||||||
retry_budget=self.config.pipeline.max_task_retries + 1,
|
retry_budget=self.config.pipeline.max_task_retries + 1,
|
||||||
|
repeated_signature_after=self.config.pipeline.stop_on_repeated_failure_signature_after
|
||||||
|
or self.config.pipeline.max_task_retries,
|
||||||
)
|
)
|
||||||
self.artifacts.write_stage_output(
|
self.artifacts.write_stage_output(
|
||||||
task.id,
|
task.id,
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ class RetryMemoryEntry:
|
||||||
status: str
|
status: str
|
||||||
cause: str
|
cause: str
|
||||||
next_stage: str
|
next_stage: str
|
||||||
|
failure_signature: str
|
||||||
|
|
||||||
|
|
||||||
def summarize_retry_memory(entries: tuple[RetryMemoryEntry, ...]) -> str:
|
def summarize_retry_memory(entries: tuple[RetryMemoryEntry, ...]) -> str:
|
||||||
|
|
@ -23,17 +24,24 @@ def summarize_retry_memory(entries: tuple[RetryMemoryEntry, ...]) -> str:
|
||||||
for entry in entries[-8:]:
|
for entry in entries[-8:]:
|
||||||
lines.append(
|
lines.append(
|
||||||
f"- Attempt {entry.attempt}: `{entry.stage_id}` returned {entry.status}; "
|
f"- Attempt {entry.attempt}: `{entry.stage_id}` returned {entry.status}; "
|
||||||
f"cause: {entry.cause}; next: `{entry.next_stage}`"
|
f"cause: {entry.cause}; signature: `{entry.failure_signature}`; next: `{entry.next_stage}`"
|
||||||
)
|
)
|
||||||
lines.append("")
|
lines.append("")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def entry_from_stage(attempt: int, result: StageResult, next_stage: str) -> RetryMemoryEntry:
|
def entry_from_stage(
|
||||||
|
attempt: int,
|
||||||
|
result: StageResult,
|
||||||
|
next_stage: str,
|
||||||
|
*,
|
||||||
|
failure_signature: str,
|
||||||
|
) -> RetryMemoryEntry:
|
||||||
return RetryMemoryEntry(
|
return RetryMemoryEntry(
|
||||||
attempt=attempt,
|
attempt=attempt,
|
||||||
stage_id=result.stage_id,
|
stage_id=result.stage_id,
|
||||||
status=result.status,
|
status=result.status,
|
||||||
cause=result.reason,
|
cause=result.reason,
|
||||||
next_stage=next_stage,
|
next_stage=next_stage,
|
||||||
|
failure_signature=failure_signature,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,10 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
PACKAGE_VERSION = "0.2.4"
|
PACKAGE_VERSION = "0.2.5"
|
||||||
RELEASE_CHANNEL = "alpha"
|
RELEASE_CHANNEL = "alpha"
|
||||||
hotdog_version = "bratwurst"
|
hotdog_version = "chicago"
|
||||||
topping_version = "relish"
|
topping_version = "onions"
|
||||||
|
|
||||||
HOTDOG_VERSIONS = (
|
HOTDOG_VERSIONS = (
|
||||||
"bratwurst",
|
"bratwurst",
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,12 @@ import unittest
|
||||||
|
|
||||||
from nightshift.artifacts import ArtifactStore
|
from nightshift.artifacts import ArtifactStore
|
||||||
from nightshift.config import parse_config, StageConfig
|
from nightshift.config import parse_config, StageConfig
|
||||||
from nightshift.failures import classify_failure
|
from nightshift.escalation import evaluate_retry_churn
|
||||||
|
from nightshift.failures import build_failure_signature, classify_failure
|
||||||
from nightshift.integ import cleanup_integration_runs, create_integration_run
|
from nightshift.integ import cleanup_integration_runs, create_integration_run
|
||||||
from nightshift.patches import validate_patch
|
from nightshift.patches import validate_patch
|
||||||
from nightshift.pipeline import PipelineRunner
|
from nightshift.pipeline import PipelineRunner
|
||||||
|
from nightshift.retry_memory import RetryMemoryEntry
|
||||||
from nightshift.tasks import parse_tasks
|
from nightshift.tasks import parse_tasks
|
||||||
|
|
||||||
from tests.test_pipeline import TASK_MD, make_config, _write_common_files
|
from tests.test_pipeline import TASK_MD, make_config, _write_common_files
|
||||||
|
|
@ -36,6 +38,61 @@ class ReliabilityFeatureTests(unittest.TestCase):
|
||||||
self.assertEqual(result.category, "missing dependency")
|
self.assertEqual(result.category, "missing dependency")
|
||||||
self.assertIn("pastebin_app", result.probable_root_cause)
|
self.assertIn("pastebin_app", result.probable_root_cause)
|
||||||
|
|
||||||
|
def test_failure_classifier_treats_traceback_into_source_as_logic_bug(self) -> None:
|
||||||
|
result = classify_failure(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
' File "C:\\repo\\project\\src\\pastebin_app\\app.py", line 31, in get_db',
|
||||||
|
" if 'db' not in g:",
|
||||||
|
"NameError: name 'g' is not defined",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
exit_code=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(result.category, "logic bug")
|
||||||
|
self.assertIn("src\\pastebin_app\\app.py", result.probable_root_cause)
|
||||||
|
|
||||||
|
def test_retry_churn_stops_on_repeated_failure_signature(self) -> None:
|
||||||
|
entries = (
|
||||||
|
RetryMemoryEntry(
|
||||||
|
attempt=1,
|
||||||
|
stage_id="test",
|
||||||
|
status="fail",
|
||||||
|
cause="Command exited with code 1: python -m pytest -q",
|
||||||
|
next_stage="implement",
|
||||||
|
failure_signature="NameError | src/pastebin_app/app.py | 31 | python -m pytest -q",
|
||||||
|
),
|
||||||
|
RetryMemoryEntry(
|
||||||
|
attempt=2,
|
||||||
|
stage_id="test",
|
||||||
|
status="fail",
|
||||||
|
cause="Command exited with code 1: python -m pytest -q",
|
||||||
|
next_stage="implement",
|
||||||
|
failure_signature="NameError | src/pastebin_app/app.py | 31 | python -m pytest -q",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
decision = evaluate_retry_churn(entries, retry_budget=4, repeated_signature_after=2)
|
||||||
|
|
||||||
|
self.assertTrue(decision.should_stop)
|
||||||
|
self.assertIn("same failure signature", decision.reason)
|
||||||
|
|
||||||
|
def test_build_failure_signature_prefers_project_traceback_over_pytest_cache(self) -> None:
|
||||||
|
signature = build_failure_signature(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
' File "C:\\repo\\project\\src\\pastebin_app\\app.py", line 31, in get_db',
|
||||||
|
"NameError: name 'g' is not defined",
|
||||||
|
' File "C:\\Users\\metis\\...\\site-packages\\_pytest\\cacheprovider.py", line 429, in set',
|
||||||
|
]
|
||||||
|
),
|
||||||
|
reason="Command exited with code 1: python -m pytest -q",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("src\\pastebin_app\\app.py", signature)
|
||||||
|
self.assertNotIn("_pytest\\cacheprovider.py", signature)
|
||||||
|
|
||||||
def test_command_failure_writes_diagnostics_and_retry_memory(self) -> None:
|
def test_command_failure_writes_diagnostics_and_retry_memory(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as directory:
|
with tempfile.TemporaryDirectory() as directory:
|
||||||
root = Path(directory)
|
root = Path(directory)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user