hack-house/.venv/lib/python3.12/site-packages/tracerite/syntaxerror.py

"""Enhanced SyntaxError position extraction.

Python's SyntaxError often provides poor position information, especially for
multi-line errors like mismatched brackets. This module parses common error
patterns and source code to provide better highlighting ranges.
"""

import re
from collections import namedtuple

# Position range: lines are 1-based inclusive, columns are 0-based exclusive
Range = namedtuple("Range", ["lfirst", "lfinal", "cbeg", "cend"])

# Patterns for extracting information from SyntaxError messages
MISMATCH_PATTERN = re.compile(
    r"closing parenthesis '([)\]}])' does not match opening parenthesis '([(\[{])' on line (\d+)"
)
UNCLOSED_PATTERN = re.compile(r"'([(\[{])' was never closed")
INCOMPLETE_INPUT_PATTERN = re.compile(r"incomplete input")
# Match "unterminated string literal" and "unterminated f-string literal"
UNTERMINATED_STRING_PATTERN = re.compile(r"unterminated (?:f-)?string literal")
# Match "unterminated triple-quoted string literal" and "unterminated triple-quoted f-string literal"
UNTERMINATED_TRIPLE_PATTERN = re.compile(
    r"unterminated triple-quoted (?:f-)?string literal"
)

# Pattern to clean up redundant line info from messages
DETECTED_AT_LINE_PATTERN = re.compile(r" \(detected at line \d+\)$")
ON_LINE_PATTERN = re.compile(r" on line \d+$")
FILENAME_LINE_PATTERN = re.compile(r" \([^)]+, line \d+\)$")

BRACKET_PAIRS = {")": "(", "]": "[", "}": "{"}
BRACKET_PAIRS_REV = {"(": ")", "[": "]", "{": "}"}
ALL_OPENERS = "([{"


def _iter_code_chars(source_lines, end_line=None, end_col=None):
    """Iterate over characters in source code, skipping strings and comments.

    Yields (line_idx_1based, col, char) for each character that is actual code
    (not inside a string literal or comment).
    """
    if end_line is None:
        end_line = len(source_lines)

    in_string = None  # None, or the quote character(s) that opened the string

    for line_idx in range(min(end_line, len(source_lines))):
        line = source_lines[line_idx].rstrip("\n\r")
        line_num = line_idx + 1  # 1-based

        # Determine where to stop on this line
        line_end = len(line)
        if line_num == end_line and end_col is not None:
            line_end = min(line_end, end_col)

        col = 0
        while col < line_end:
            char = line[col]
            rest = line[col:]

            if in_string:
                # Check for end of string
                if rest.startswith(in_string):
                    # Check it's not escaped (count preceding backslashes)
                    num_backslashes = 0
                    check_col = col - 1
                    while check_col >= 0 and line[check_col] == "\\":
                        num_backslashes += 1
                        check_col -= 1
                    if num_backslashes % 2 == 0:  # Not escaped
                        col += len(in_string)
                        in_string = None
                        continue
                col += 1
                continue

            # Check for start of string
            if rest.startswith('"""') or rest.startswith("'''"):
                in_string = rest[:3]
                col += 3
                continue
            if char in "\"'":
                in_string = char
                col += 1
                continue

            # Check for comment
            if char == "#":
                break  # Rest of line is comment

            # This is actual code
            yield line_num, col, char
            col += 1

        # Single-quoted strings don't span lines (would be a syntax error)
        if in_string and len(in_string) == 1:
            in_string = None


def clean_syntax_error_message(message):
    """Clean up redundant information from SyntaxError messages.

    Removes patterns like:
    - " (detected at line 1)" from unterminated strings
    - " on line 2" from bracket mismatches
    - " (filename.py, line N)" suffix
    These are redundant since we show the line in the traceback.
    """
    message = DETECTED_AT_LINE_PATTERN.sub("", message)
    message = ON_LINE_PATTERN.sub("", message)
    message = FILENAME_LINE_PATTERN.sub("", message)
    return message


def extract_enhanced_positions(e, source_lines):
    """Extract enhanced position information for a SyntaxError.

    Args:
        e: The SyntaxError exception
        source_lines: List of source lines (strings with newlines)

    Returns:
        Tuple of (mark_range, em_ranges) where:
        mark_range: Range for the full highlight (e.g., from opening to closing bracket), or None
        em_ranges: List of Range objects for emphasized positions (e.g., both mismatched brackets), or None
    """
    message = str(e)

    # Try to handle mismatched brackets: "closing parenthesis ')' does not match opening parenthesis '{' on line 1"
    match = MISMATCH_PATTERN.search(message)
    if match:
        return _handle_mismatch(e, source_lines, match)

    # Try to handle unclosed brackets: "'(' was never closed"
    match = UNCLOSED_PATTERN.search(message)
    if match:
        return _handle_unclosed(e, source_lines, match)

    # Try to handle unterminated triple-quoted string (check before single)
    match = UNTERMINATED_TRIPLE_PATTERN.search(message)
    if match:
        return _handle_unterminated_triple_string(e, source_lines)

    # Try to handle unterminated string literal
    match = UNTERMINATED_STRING_PATTERN.search(message)
    if match:
        return _handle_unterminated_string(e, source_lines)

    # Try to handle incomplete input (e.g., _IncompleteInputError)
    match = INCOMPLETE_INPUT_PATTERN.search(message)
    if match:
        return _handle_incomplete(e, source_lines)

    # Default: use Python's positions
    return None, None


def _handle_mismatch(e, source_lines, match):
    """Handle mismatched bracket errors."""
    opening_char = match.group(2)  # The opening bracket it should match
    opening_line = int(match.group(3))  # Line number of opening bracket (1-based)

    closing_line = e.lineno
    closing_col = (e.offset - 1) if e.offset else 0

    # Find the opening bracket position on its line
    opening_col = None
    if 0 < opening_line <= len(source_lines):
        # Find the opening bracket - search for the one that would be unmatched
        opening_col = _find_unmatched_opener(
            source_lines, opening_line, opening_char, closing_line, closing_col
        )

    if opening_col is None:
        # Fallback: just find first occurrence
        if 0 < opening_line <= len(source_lines):
            opening_col = source_lines[opening_line - 1].find(opening_char)
            if opening_col < 0:
                opening_col = 0
        else:
            opening_col = 0

    # Mark range spans from opening bracket to closing bracket
    mark_range = Range(opening_line, closing_line, opening_col, closing_col + 1)

    # Emphasis on both mismatched brackets
    em_ranges = [
        Range(opening_line, opening_line, opening_col, opening_col + 1),
        Range(closing_line, closing_line, closing_col, closing_col + 1),
    ]

    return mark_range, em_ranges


def _handle_unclosed(e, source_lines, match):
    """Handle unclosed bracket errors."""
    opening_char = match.group(1)

    # Python gives us the line where it detected the problem
    # The opening bracket is somewhere before
    error_line = e.lineno
    error_col = (e.offset - 1) if e.offset else 0

    # Search backwards for the unclosed opener
    opening_line, opening_col = _find_unclosed_opener(
        source_lines, error_line, opening_char
    )

    if opening_line is None or opening_col is None:
        return None, None

    # Mark from opener to error position
    mark_range = Range(opening_line, error_line, opening_col, error_col + 1)
    em_ranges = [Range(opening_line, opening_line, opening_col, opening_col + 1)]

    return mark_range, em_ranges


def _handle_incomplete(e, source_lines):
    """Handle incomplete input errors (e.g., _IncompleteInputError).

    These occur when code is syntactically valid but incomplete (unclosed bracket,
    unterminated string, etc.). Python only gives us the final line number.
    We need to find the unclosed construct and mark from there to the end.
    """
    # Find the last non-empty line (trimmed, ignoring comments)
    end_line = len(source_lines)
    end_col = 0
    for i in range(len(source_lines) - 1, -1, -1):
        line = source_lines[i].rstrip("\n\r")
        # Remove comments for checking if line is empty
        code_part = line.split("#")[0].rstrip()
        if code_part:
            end_line = i + 1  # 1-based
            end_col = len(line)
            break

    # Try to find any unclosed bracket
    opening_line, opening_col, opener_char = _find_any_unclosed_opener(
        source_lines, end_line
    )

    if opening_line is None or opening_col is None:
        return None, None

    # Mark from opener to end of meaningful content
    mark_range = Range(opening_line, end_line, opening_col, end_col)
    em_ranges = [Range(opening_line, opening_line, opening_col, opening_col + 1)]

    return mark_range, em_ranges


def _find_any_unclosed_opener(source_lines, end_line):
    """Find any unclosed opening bracket by scanning the source."""
    # Track all bracket types using proper tokenization
    stacks = {char: [] for char in ALL_OPENERS}

    for line_num, col, char in _iter_code_chars(source_lines, end_line):
        if char in ALL_OPENERS:
            stacks[char].append((line_num, col))
        elif char in BRACKET_PAIRS:
            opener = BRACKET_PAIRS[char]
            if stacks[opener]:
                stacks[opener].pop()

    # Find the first unclosed opener (earliest in code)
    first_unclosed = None
    first_opener = None
    for opener_char, stack in stacks.items():
        if stack:
            pos = stack[0]  # First unclosed of this type
            if first_unclosed is None or (pos[0], pos[1]) < (
                first_unclosed[0],
                first_unclosed[1],
            ):
                first_unclosed = pos
                first_opener = opener_char

    if first_unclosed:
        return first_unclosed[0], first_unclosed[1], first_opener
    return None, None, None


def _find_unmatched_opener(
    source_lines, opener_line, opener_char, closer_line, closer_col
):
    """Find the column of the unmatched opening bracket.

    Scans from the indicated opener_line to find which opening bracket
    is actually unmatched with the closer at closer_line:closer_col.
    Uses proper tokenization to skip brackets inside strings and comments.
    """
    closer_char = BRACKET_PAIRS_REV.get(opener_char, ")")

    # Track bracket depth as we scan
    # We need to find the opener that would be matched by the closer
    stack = []  # Stack of (line, col) for opening brackets

    # Use tokenizer, but only scan from opener_line to closer position
    for line_num, col, char in _iter_code_chars(source_lines, closer_line, closer_col):
        if line_num < opener_line:
            continue
        if char == opener_char:
            stack.append((line_num, col))
        elif char == closer_char and stack:
            stack.pop()

    # The last unmatched opener is what we want
    if stack:
        return stack[-1][1]
    return None


def _find_unclosed_opener(source_lines, error_line, opener_char):
    """Find an unclosed opening bracket by scanning the source.

    Uses proper tokenization to skip brackets inside strings and comments.
    """
    closer_char = BRACKET_PAIRS_REV.get(opener_char, ")")

    # Scan through code tracking bracket balance
    stack = []  # Stack of (line, col) for opening brackets

    for line_num, col, char in _iter_code_chars(source_lines, error_line):
        if char == opener_char:
            stack.append((line_num, col))
        elif char == closer_char and stack:
            stack.pop()

    # Return the first unclosed opener
    if stack:
        return stack[0]
    return None, None


def _get_string_opener_length(line, col):
    """Get the length of a string opener (prefix + quotes) starting at col.

    Returns the length of the full opener, e.g.:
    - ' or " -> 1
    - ''' or \"\"\" -> 3
    - f' or f" -> 2
    - f''' or f\"\"\" -> 4
    - rf' or fr" -> 3
    - rf''' or rf\"\"\" -> 5
    """
    rest = line[col:]

    # Check for string prefix (case insensitive: f, r, b, u, fr, rf, br, rb)
    prefix_len = 0
    prefix_rest = rest.lower()
    if prefix_rest[:2] in ("fr", "rf", "br", "rb"):
        prefix_len = 2
    elif prefix_rest[:1] in ("f", "r", "b", "u"):
        prefix_len = 1

    # Check for quotes after prefix
    after_prefix = rest[prefix_len:]
    if after_prefix.startswith('"""') or after_prefix.startswith("'''"):
        return prefix_len + 3
    elif after_prefix and after_prefix[0] in "\"'":
        return prefix_len + 1

    # Fallback: just one character
    return 1


def _handle_unterminated_string(e, source_lines):
    """Handle unterminated string literal errors.

    For single-line strings, mark from the opening to end of the line,
    and emphasize the full opener (prefix + quote).
    """
    error_line = e.lineno
    error_col = (e.offset - 1) if e.offset else 0

    if not source_lines or error_line < 1 or error_line > len(source_lines):
        return None, None

    line = source_lines[error_line - 1].rstrip("\n\r")
    end_col = len(line)

    # Get the full string opener length (prefix + quote)
    opener_len = _get_string_opener_length(line, error_col)

    # Mark from the opening to end of line
    mark_range = Range(error_line, error_line, error_col, end_col)
    # Emphasize the full opener (prefix + quote)
    em_ranges = [Range(error_line, error_line, error_col, error_col + opener_len)]

    return mark_range, em_ranges


def _handle_unterminated_triple_string(e, source_lines):
    """Handle unterminated triple-quoted string literal errors.

    Mark from opening to end of line, emphasize the full opener (prefix + triple quotes).
    """
    error_line = e.lineno
    error_col = (e.offset - 1) if e.offset else 0

    if not source_lines or error_line < 1 or error_line > len(source_lines):
        return None, None

    line = source_lines[error_line - 1].rstrip("\n\r")
    end_col = len(line)

    # Get the full string opener length (prefix + triple quotes)
    opener_len = _get_string_opener_length(line, error_col)

    # Mark from opening to end of line (not end of input - per user feedback)
    mark_range = Range(error_line, error_line, error_col, end_col)
    # Emphasize the full opener (prefix + triple quotes)
    em_ranges = [Range(error_line, error_line, error_col, error_col + opener_len)]

    return mark_range, em_ranges