from __future__ import annotations import re from dataclasses import dataclass from typing import Callable, Iterable @dataclass class Lexem: text: str type_name: str value: str class LexemeType: def __init__( self, name: str, pattern: str, value_func: Callable[[str], str] = lambda _: "", ): self.name = name self.regex = re.compile(r"\s*(" + pattern + ")") self.value_func = value_func def consume(self, text: str) -> tuple[Lexem | None, str]: match = self.regex.match(text) if match: lexeme_text = match.group(1) value = self.value_func(lexeme_text) rest = text[match.end() :] return Lexem(lexeme_text, self.name, value), rest return None, text class Lexer: def __init__(self, lexeme_types: Iterable[LexemeType]): self.lexeme_types = lexeme_types def analyze(self, text: str) -> list[Lexem]: lexems: list[Lexem] = [] while text.strip(): for lex_type in self.lexeme_types: lexem, new_text = lex_type.consume(text) if lexem: lexems.append(lexem) text = new_text break else: error_lexeme, text = self._consume_error(text) lexems.append(error_lexeme) return lexems def _consume_error(self, text: str) -> tuple[Lexem, str]: match = re.match(r"\s*(\S+)", text) err_text = match.group(1) if match else text.strip() print(f"Недопустимая лексема: {err_text}") rest = text[match.end() :] if match else "" return Lexem(err_text, "ERROR", ""), rest