Rebrand the Rust client crate (coven/ → hh/, package+binary "hack-house"), README, CLI strings, and branch (coven → hack-house). Gitea repo renamed cmd-chat → hack-house to match. Crypto/server logic unchanged; selftest + golden-vector test still green, binary is now `hack-house`. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
418 lines
14 KiB
Python
418 lines
14 KiB
Python
"""Enhanced SyntaxError position extraction.
|
|
|
|
Python's SyntaxError often provides poor position information, especially for
|
|
multi-line errors like mismatched brackets. This module parses common error
|
|
patterns and source code to provide better highlighting ranges.
|
|
"""
|
|
|
|
import re
|
|
from collections import namedtuple
|
|
|
|
# Position range: lines are 1-based inclusive, columns are 0-based exclusive
|
|
Range = namedtuple("Range", ["lfirst", "lfinal", "cbeg", "cend"])
|
|
|
|
# Patterns for extracting information from SyntaxError messages
|
|
MISMATCH_PATTERN = re.compile(
|
|
r"closing parenthesis '([)\]}])' does not match opening parenthesis '([(\[{])' on line (\d+)"
|
|
)
|
|
UNCLOSED_PATTERN = re.compile(r"'([(\[{])' was never closed")
|
|
INCOMPLETE_INPUT_PATTERN = re.compile(r"incomplete input")
|
|
# Match "unterminated string literal" and "unterminated f-string literal"
|
|
UNTERMINATED_STRING_PATTERN = re.compile(r"unterminated (?:f-)?string literal")
|
|
# Match "unterminated triple-quoted string literal" and "unterminated triple-quoted f-string literal"
|
|
UNTERMINATED_TRIPLE_PATTERN = re.compile(
|
|
r"unterminated triple-quoted (?:f-)?string literal"
|
|
)
|
|
|
|
# Pattern to clean up redundant line info from messages
|
|
DETECTED_AT_LINE_PATTERN = re.compile(r" \(detected at line \d+\)$")
|
|
ON_LINE_PATTERN = re.compile(r" on line \d+$")
|
|
FILENAME_LINE_PATTERN = re.compile(r" \([^)]+, line \d+\)$")
|
|
|
|
BRACKET_PAIRS = {")": "(", "]": "[", "}": "{"}
|
|
BRACKET_PAIRS_REV = {"(": ")", "[": "]", "{": "}"}
|
|
ALL_OPENERS = "([{"
|
|
|
|
|
|
def _iter_code_chars(source_lines, end_line=None, end_col=None):
|
|
"""Iterate over characters in source code, skipping strings and comments.
|
|
|
|
Yields (line_idx_1based, col, char) for each character that is actual code
|
|
(not inside a string literal or comment).
|
|
"""
|
|
if end_line is None:
|
|
end_line = len(source_lines)
|
|
|
|
in_string = None # None, or the quote character(s) that opened the string
|
|
|
|
for line_idx in range(min(end_line, len(source_lines))):
|
|
line = source_lines[line_idx].rstrip("\n\r")
|
|
line_num = line_idx + 1 # 1-based
|
|
|
|
# Determine where to stop on this line
|
|
line_end = len(line)
|
|
if line_num == end_line and end_col is not None:
|
|
line_end = min(line_end, end_col)
|
|
|
|
col = 0
|
|
while col < line_end:
|
|
char = line[col]
|
|
rest = line[col:]
|
|
|
|
if in_string:
|
|
# Check for end of string
|
|
if rest.startswith(in_string):
|
|
# Check it's not escaped (count preceding backslashes)
|
|
num_backslashes = 0
|
|
check_col = col - 1
|
|
while check_col >= 0 and line[check_col] == "\\":
|
|
num_backslashes += 1
|
|
check_col -= 1
|
|
if num_backslashes % 2 == 0: # Not escaped
|
|
col += len(in_string)
|
|
in_string = None
|
|
continue
|
|
col += 1
|
|
continue
|
|
|
|
# Check for start of string
|
|
if rest.startswith('"""') or rest.startswith("'''"):
|
|
in_string = rest[:3]
|
|
col += 3
|
|
continue
|
|
if char in "\"'":
|
|
in_string = char
|
|
col += 1
|
|
continue
|
|
|
|
# Check for comment
|
|
if char == "#":
|
|
break # Rest of line is comment
|
|
|
|
# This is actual code
|
|
yield line_num, col, char
|
|
col += 1
|
|
|
|
# Single-quoted strings don't span lines (would be a syntax error)
|
|
if in_string and len(in_string) == 1:
|
|
in_string = None
|
|
|
|
|
|
def clean_syntax_error_message(message):
|
|
"""Clean up redundant information from SyntaxError messages.
|
|
|
|
Removes patterns like:
|
|
- " (detected at line 1)" from unterminated strings
|
|
- " on line 2" from bracket mismatches
|
|
- " (filename.py, line N)" suffix
|
|
These are redundant since we show the line in the traceback.
|
|
"""
|
|
message = DETECTED_AT_LINE_PATTERN.sub("", message)
|
|
message = ON_LINE_PATTERN.sub("", message)
|
|
message = FILENAME_LINE_PATTERN.sub("", message)
|
|
return message
|
|
|
|
|
|
def extract_enhanced_positions(e, source_lines):
|
|
"""Extract enhanced position information for a SyntaxError.
|
|
|
|
Args:
|
|
e: The SyntaxError exception
|
|
source_lines: List of source lines (strings with newlines)
|
|
|
|
Returns:
|
|
Tuple of (mark_range, em_ranges) where:
|
|
mark_range: Range for the full highlight (e.g., from opening to closing bracket), or None
|
|
em_ranges: List of Range objects for emphasized positions (e.g., both mismatched brackets), or None
|
|
"""
|
|
message = str(e)
|
|
|
|
# Try to handle mismatched brackets: "closing parenthesis ')' does not match opening parenthesis '{' on line 1"
|
|
match = MISMATCH_PATTERN.search(message)
|
|
if match:
|
|
return _handle_mismatch(e, source_lines, match)
|
|
|
|
# Try to handle unclosed brackets: "'(' was never closed"
|
|
match = UNCLOSED_PATTERN.search(message)
|
|
if match:
|
|
return _handle_unclosed(e, source_lines, match)
|
|
|
|
# Try to handle unterminated triple-quoted string (check before single)
|
|
match = UNTERMINATED_TRIPLE_PATTERN.search(message)
|
|
if match:
|
|
return _handle_unterminated_triple_string(e, source_lines)
|
|
|
|
# Try to handle unterminated string literal
|
|
match = UNTERMINATED_STRING_PATTERN.search(message)
|
|
if match:
|
|
return _handle_unterminated_string(e, source_lines)
|
|
|
|
# Try to handle incomplete input (e.g., _IncompleteInputError)
|
|
match = INCOMPLETE_INPUT_PATTERN.search(message)
|
|
if match:
|
|
return _handle_incomplete(e, source_lines)
|
|
|
|
# Default: use Python's positions
|
|
return None, None
|
|
|
|
|
|
def _handle_mismatch(e, source_lines, match):
|
|
"""Handle mismatched bracket errors."""
|
|
opening_char = match.group(2) # The opening bracket it should match
|
|
opening_line = int(match.group(3)) # Line number of opening bracket (1-based)
|
|
|
|
closing_line = e.lineno
|
|
closing_col = (e.offset - 1) if e.offset else 0
|
|
|
|
# Find the opening bracket position on its line
|
|
opening_col = None
|
|
if 0 < opening_line <= len(source_lines):
|
|
# Find the opening bracket - search for the one that would be unmatched
|
|
opening_col = _find_unmatched_opener(
|
|
source_lines, opening_line, opening_char, closing_line, closing_col
|
|
)
|
|
|
|
if opening_col is None:
|
|
# Fallback: just find first occurrence
|
|
if 0 < opening_line <= len(source_lines):
|
|
opening_col = source_lines[opening_line - 1].find(opening_char)
|
|
if opening_col < 0:
|
|
opening_col = 0
|
|
else:
|
|
opening_col = 0
|
|
|
|
# Mark range spans from opening bracket to closing bracket
|
|
mark_range = Range(opening_line, closing_line, opening_col, closing_col + 1)
|
|
|
|
# Emphasis on both mismatched brackets
|
|
em_ranges = [
|
|
Range(opening_line, opening_line, opening_col, opening_col + 1),
|
|
Range(closing_line, closing_line, closing_col, closing_col + 1),
|
|
]
|
|
|
|
return mark_range, em_ranges
|
|
|
|
|
|
def _handle_unclosed(e, source_lines, match):
|
|
"""Handle unclosed bracket errors."""
|
|
opening_char = match.group(1)
|
|
|
|
# Python gives us the line where it detected the problem
|
|
# The opening bracket is somewhere before
|
|
error_line = e.lineno
|
|
error_col = (e.offset - 1) if e.offset else 0
|
|
|
|
# Search backwards for the unclosed opener
|
|
opening_line, opening_col = _find_unclosed_opener(
|
|
source_lines, error_line, opening_char
|
|
)
|
|
|
|
if opening_line is None or opening_col is None:
|
|
return None, None
|
|
|
|
# Mark from opener to error position
|
|
mark_range = Range(opening_line, error_line, opening_col, error_col + 1)
|
|
em_ranges = [Range(opening_line, opening_line, opening_col, opening_col + 1)]
|
|
|
|
return mark_range, em_ranges
|
|
|
|
|
|
def _handle_incomplete(e, source_lines):
|
|
"""Handle incomplete input errors (e.g., _IncompleteInputError).
|
|
|
|
These occur when code is syntactically valid but incomplete (unclosed bracket,
|
|
unterminated string, etc.). Python only gives us the final line number.
|
|
We need to find the unclosed construct and mark from there to the end.
|
|
"""
|
|
# Find the last non-empty line (trimmed, ignoring comments)
|
|
end_line = len(source_lines)
|
|
end_col = 0
|
|
for i in range(len(source_lines) - 1, -1, -1):
|
|
line = source_lines[i].rstrip("\n\r")
|
|
# Remove comments for checking if line is empty
|
|
code_part = line.split("#")[0].rstrip()
|
|
if code_part:
|
|
end_line = i + 1 # 1-based
|
|
end_col = len(line)
|
|
break
|
|
|
|
# Try to find any unclosed bracket
|
|
opening_line, opening_col, opener_char = _find_any_unclosed_opener(
|
|
source_lines, end_line
|
|
)
|
|
|
|
if opening_line is None or opening_col is None:
|
|
return None, None
|
|
|
|
# Mark from opener to end of meaningful content
|
|
mark_range = Range(opening_line, end_line, opening_col, end_col)
|
|
em_ranges = [Range(opening_line, opening_line, opening_col, opening_col + 1)]
|
|
|
|
return mark_range, em_ranges
|
|
|
|
|
|
def _find_any_unclosed_opener(source_lines, end_line):
|
|
"""Find any unclosed opening bracket by scanning the source."""
|
|
# Track all bracket types using proper tokenization
|
|
stacks = {char: [] for char in ALL_OPENERS}
|
|
|
|
for line_num, col, char in _iter_code_chars(source_lines, end_line):
|
|
if char in ALL_OPENERS:
|
|
stacks[char].append((line_num, col))
|
|
elif char in BRACKET_PAIRS:
|
|
opener = BRACKET_PAIRS[char]
|
|
if stacks[opener]:
|
|
stacks[opener].pop()
|
|
|
|
# Find the first unclosed opener (earliest in code)
|
|
first_unclosed = None
|
|
first_opener = None
|
|
for opener_char, stack in stacks.items():
|
|
if stack:
|
|
pos = stack[0] # First unclosed of this type
|
|
if first_unclosed is None or (pos[0], pos[1]) < (
|
|
first_unclosed[0],
|
|
first_unclosed[1],
|
|
):
|
|
first_unclosed = pos
|
|
first_opener = opener_char
|
|
|
|
if first_unclosed:
|
|
return first_unclosed[0], first_unclosed[1], first_opener
|
|
return None, None, None
|
|
|
|
|
|
def _find_unmatched_opener(
|
|
source_lines, opener_line, opener_char, closer_line, closer_col
|
|
):
|
|
"""Find the column of the unmatched opening bracket.
|
|
|
|
Scans from the indicated opener_line to find which opening bracket
|
|
is actually unmatched with the closer at closer_line:closer_col.
|
|
Uses proper tokenization to skip brackets inside strings and comments.
|
|
"""
|
|
closer_char = BRACKET_PAIRS_REV.get(opener_char, ")")
|
|
|
|
# Track bracket depth as we scan
|
|
# We need to find the opener that would be matched by the closer
|
|
stack = [] # Stack of (line, col) for opening brackets
|
|
|
|
# Use tokenizer, but only scan from opener_line to closer position
|
|
for line_num, col, char in _iter_code_chars(source_lines, closer_line, closer_col):
|
|
if line_num < opener_line:
|
|
continue
|
|
if char == opener_char:
|
|
stack.append((line_num, col))
|
|
elif char == closer_char and stack:
|
|
stack.pop()
|
|
|
|
# The last unmatched opener is what we want
|
|
if stack:
|
|
return stack[-1][1]
|
|
return None
|
|
|
|
|
|
def _find_unclosed_opener(source_lines, error_line, opener_char):
|
|
"""Find an unclosed opening bracket by scanning the source.
|
|
|
|
Uses proper tokenization to skip brackets inside strings and comments.
|
|
"""
|
|
closer_char = BRACKET_PAIRS_REV.get(opener_char, ")")
|
|
|
|
# Scan through code tracking bracket balance
|
|
stack = [] # Stack of (line, col) for opening brackets
|
|
|
|
for line_num, col, char in _iter_code_chars(source_lines, error_line):
|
|
if char == opener_char:
|
|
stack.append((line_num, col))
|
|
elif char == closer_char and stack:
|
|
stack.pop()
|
|
|
|
# Return the first unclosed opener
|
|
if stack:
|
|
return stack[0]
|
|
return None, None
|
|
|
|
|
|
def _get_string_opener_length(line, col):
|
|
"""Get the length of a string opener (prefix + quotes) starting at col.
|
|
|
|
Returns the length of the full opener, e.g.:
|
|
- ' or " -> 1
|
|
- ''' or \"\"\" -> 3
|
|
- f' or f" -> 2
|
|
- f''' or f\"\"\" -> 4
|
|
- rf' or fr" -> 3
|
|
- rf''' or rf\"\"\" -> 5
|
|
"""
|
|
rest = line[col:]
|
|
|
|
# Check for string prefix (case insensitive: f, r, b, u, fr, rf, br, rb)
|
|
prefix_len = 0
|
|
prefix_rest = rest.lower()
|
|
if prefix_rest[:2] in ("fr", "rf", "br", "rb"):
|
|
prefix_len = 2
|
|
elif prefix_rest[:1] in ("f", "r", "b", "u"):
|
|
prefix_len = 1
|
|
|
|
# Check for quotes after prefix
|
|
after_prefix = rest[prefix_len:]
|
|
if after_prefix.startswith('"""') or after_prefix.startswith("'''"):
|
|
return prefix_len + 3
|
|
elif after_prefix and after_prefix[0] in "\"'":
|
|
return prefix_len + 1
|
|
|
|
# Fallback: just one character
|
|
return 1
|
|
|
|
|
|
def _handle_unterminated_string(e, source_lines):
|
|
"""Handle unterminated string literal errors.
|
|
|
|
For single-line strings, mark from the opening to end of the line,
|
|
and emphasize the full opener (prefix + quote).
|
|
"""
|
|
error_line = e.lineno
|
|
error_col = (e.offset - 1) if e.offset else 0
|
|
|
|
if not source_lines or error_line < 1 or error_line > len(source_lines):
|
|
return None, None
|
|
|
|
line = source_lines[error_line - 1].rstrip("\n\r")
|
|
end_col = len(line)
|
|
|
|
# Get the full string opener length (prefix + quote)
|
|
opener_len = _get_string_opener_length(line, error_col)
|
|
|
|
# Mark from the opening to end of line
|
|
mark_range = Range(error_line, error_line, error_col, end_col)
|
|
# Emphasize the full opener (prefix + quote)
|
|
em_ranges = [Range(error_line, error_line, error_col, error_col + opener_len)]
|
|
|
|
return mark_range, em_ranges
|
|
|
|
|
|
def _handle_unterminated_triple_string(e, source_lines):
|
|
"""Handle unterminated triple-quoted string literal errors.
|
|
|
|
Mark from opening to end of line, emphasize the full opener (prefix + triple quotes).
|
|
"""
|
|
error_line = e.lineno
|
|
error_col = (e.offset - 1) if e.offset else 0
|
|
|
|
if not source_lines or error_line < 1 or error_line > len(source_lines):
|
|
return None, None
|
|
|
|
line = source_lines[error_line - 1].rstrip("\n\r")
|
|
end_col = len(line)
|
|
|
|
# Get the full string opener length (prefix + triple quotes)
|
|
opener_len = _get_string_opener_length(line, error_col)
|
|
|
|
# Mark from opening to end of line (not end of input - per user feedback)
|
|
mark_range = Range(error_line, error_line, error_col, end_col)
|
|
# Emphasize the full opener (prefix + triple quotes)
|
|
em_ranges = [Range(error_line, error_line, error_col, error_col + opener_len)]
|
|
|
|
return mark_range, em_ranges
|