diff --git a/lark-stubs/__init__.pyi b/lark-stubs/__init__.pyi deleted file mode 100644 index c79a6ef..0000000 --- a/lark-stubs/__init__.pyi +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- - -from .tree import * -from .visitors import * -from .exceptions import * -from .lexer import * -from .load_grammar import * -from .lark import * -from logging import Logger as _Logger - -logger: _Logger -__version__: str = ... diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi deleted file mode 100644 index 004865c..0000000 --- a/lark-stubs/lexer.pyi +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- -from types import ModuleType -from typing import ( - TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, - Pattern as REPattern, -) -from abc import abstractmethod, ABC - -_T = TypeVar('_T') - - -class Pattern(ABC): - value: str - flags: Collection[str] - raw: str - type: str - - def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None: - ... - - @abstractmethod - def to_regexp(self) -> str: - ... - - @property - @abstractmethod - def min_width(self) -> int: - ... - - @property - @abstractmethod - def max_width(self) -> int: - ... - - -class PatternStr(Pattern): - type: str = ... - - def to_regexp(self) -> str: - ... - - @property - def min_width(self) -> int: - ... - - @property - def max_width(self) -> int: - ... - - -class PatternRE(Pattern): - type: str = ... - - def to_regexp(self) -> str: - ... - - @property - def min_width(self) -> int: - ... - - @property - def max_width(self) -> int: - ... - - -class TerminalDef: - name: str - pattern: Pattern - priority: int - - def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None: - ... - - def user_repr(self) -> str: ... - - -class Token(str): - type: str - start_pos: int - value: Any - line: int - column: int - end_line: int - end_column: int - end_pos: int - - def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: - ... - - def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: - ... - - @classmethod - def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T: - ... - - -_Callback = Callable[[Token], Token] - - -class Lexer(ABC): - lex: Callable[..., Iterator[Token]] - - -class LexerConf: - tokens: Collection[TerminalDef] - re_module: ModuleType - ignore: Collection[str] = () - postlex: Any =None - callbacks: Optional[Dict[str, _Callback]] = None - g_regex_flags: int = 0 - skip_validation: bool = False - use_bytes: bool = False - - - -class TraditionalLexer(Lexer): - terminals: Collection[TerminalDef] - ignore_types: FrozenSet[str] - newline_types: FrozenSet[str] - user_callbacks: Dict[str, _Callback] - callback: Dict[str, _Callback] - mres: List[Tuple[REPattern, Dict[int, str]]] - re: ModuleType - - def __init__( - self, - conf: LexerConf - ) -> None: - ... - - def build(self) -> None: - ... - - def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]: - ... - - def lex(self, stream: str) -> Iterator[Token]: - ... - - def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: - ... - -class ContextualLexer(Lexer): - lexers: Dict[str, TraditionalLexer] - root_lexer: TraditionalLexer - - def __init__( - self, - terminals: Collection[TerminalDef], - states: Dict[str, Collection[str]], - re_: ModuleType, - ignore: Collection[str] = ..., - always_accept: Collection[str] = ..., - user_callbacks: Dict[str, _Callback] = ..., - g_regex_flags: int = ... - ) -> None: - ... - - def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]: - ... diff --git a/lark-stubs/parsers/__init__.pyi b/lark-stubs/parsers/__init__.pyi deleted file mode 100644 index e69de29..0000000 diff --git a/lark/__init__.py b/lark/__init__.py index aff5683..609cfc7 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, from .lexer import Token from .lark import Lark -__version__ = "1.0.0a" +__version__: str = "1.0.0a" diff --git a/lark/common.py b/lark/common.py index e2cde6b..ccd5e16 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,14 +1,29 @@ from .utils import Serialize -from .lexer import TerminalDef +from .lexer import TerminalDef, Token ###{standalone +from types import ModuleType +from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING +if TYPE_CHECKING: + from .lark import PostLex + +_Callback = Callable[[Token], Token] class LexerConf(Serialize): __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' __serialize_namespace__ = TerminalDef, - def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): + terminals: Collection[TerminalDef] + re_module: ModuleType + ignore: Collection[str] = () + postlex: 'PostLex' = None + callbacks: Optional[Dict[str, _Callback]] = None + g_regex_flags: int = 0 + skip_validation: bool = False + use_bytes: bool = False + + def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): self.terminals = terminals self.terminals_by_name = {t.name: t for t in self.terminals} assert len(self.terminals) == len(self.terminals_by_name) diff --git a/lark/lexer.py b/lark/lexer.py index 77f7090..6177d26 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -1,5 +1,6 @@ # Lexer Implementation +from abc import abstractmethod, ABC import re from contextlib import suppress @@ -9,12 +10,23 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken ###{standalone from copy import copy +from types import ModuleType +from typing import ( + TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + Pattern as REPattern, TYPE_CHECKING +) -class Pattern(Serialize): - raw = None - type = None +if TYPE_CHECKING: + from .common import LexerConf - def __init__(self, value, flags=(), raw=None): +class Pattern(Serialize, ABC): + + value: str + flags: Collection[str] + raw: str = None + type: str = None + + def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: self.value = value self.flags = frozenset(flags) self.raw = raw @@ -29,13 +41,18 @@ class Pattern(Serialize): def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags - def to_regexp(self): + @abstractmethod + def to_regexp(self) -> str: raise NotImplementedError() - def min_width(self): + @property + @abstractmethod + def min_width(self) -> int: raise NotImplementedError() - def max_width(self): + @property + @abstractmethod + def max_width(self) -> int: raise NotImplementedError() if Py36: @@ -56,13 +73,13 @@ class Pattern(Serialize): class PatternStr(Pattern): __serialize_fields__ = 'value', 'flags' - type = "str" + type: str = "str" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(re.escape(self.value)) @property - def min_width(self): + def min_width(self) -> int: return len(self.value) max_width = min_width @@ -70,9 +87,9 @@ class PatternStr(Pattern): class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', '_width' - type = "re" + type: str = "re" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(self.value) _width = None @@ -82,11 +99,11 @@ class PatternRE(Pattern): return self._width @property - def min_width(self): + def min_width(self) -> int: return self._get_width()[0] @property - def max_width(self): + def max_width(self) -> int: return self._get_width()[1] @@ -94,7 +111,11 @@ class TerminalDef(Serialize): __serialize_fields__ = 'name', 'pattern', 'priority' __serialize_namespace__ = PatternStr, PatternRE - def __init__(self, name, pattern, priority=1): + name: str + pattern: Pattern + priority: int + + def __init__(self, name: str, pattern: Pattern, priority: int=1) -> None: assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern @@ -103,12 +124,13 @@ class TerminalDef(Serialize): def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) - def user_repr(self): + def user_repr(self) -> str: if self.name.startswith('__'): # We represent a generated terminal return self.pattern.raw or self.name else: return self.name +_T = TypeVar('_T') class Token(str): """A string with meta-information, that is produced by the lexer. @@ -131,6 +153,15 @@ class Token(str): """ __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') + type: str + start_pos: int + value: Any + line: int + column: int + end_line: int + end_column: int + end_pos: int + def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): try: self = super(Token, cls).__new__(cls, value) @@ -148,7 +179,7 @@ class Token(str): self.end_pos = end_pos return self - def update(self, type_=None, value=None): + def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': return Token.new_borrow_pos( type_ if type_ is not None else self.type, value if value is not None else self.value, @@ -156,7 +187,7 @@ class Token(str): ) @classmethod - def new_borrow_pos(cls, type_, value, borrow_t): + def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): @@ -289,14 +320,15 @@ def _regexp_has_newline(r): """ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) +_Callback = Callable[[Token], Token] -class Lexer(object): +class Lexer(ABC): """Lexer interface Method Signatures: lex(self, text) -> Iterator[Token] """ - lex = NotImplemented + lex: Callable[..., Iterator[Token]] = NotImplemented def make_lexer_state(self, text): line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') @@ -305,7 +337,14 @@ class Lexer(object): class TraditionalLexer(Lexer): - def __init__(self, conf): + terminals: Collection[TerminalDef] + ignore_types: FrozenSet[str] + newline_types: FrozenSet[str] + user_callbacks: Dict[str, _Callback] + callback: Dict[str, _Callback] + re: ModuleType + + def __init__(self, conf: 'LexerConf') -> None: terminals = list(conf.terminals) assert all(isinstance(t, TerminalDef) for t in terminals), terminals @@ -338,7 +377,7 @@ class TraditionalLexer(Lexer): self._mres = None - def _build(self): + def _build(self) -> None: terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) assert all(self.callback.values()) @@ -352,23 +391,23 @@ class TraditionalLexer(Lexer): self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes) @property - def mres(self): + def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: if self._mres is None: self._build() return self._mres - def match(self, text, pos): + def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: for mre, type_from_index in self.mres: m = mre.match(text, pos) if m: return m.group(0), type_from_index[m.lastindex] - def lex(self, state, parser_state): + def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: with suppress(EOFError): while True: yield self.next_token(state, parser_state) - def next_token(self, lex_state, parser_state=None): + def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) @@ -424,7 +463,10 @@ class LexerState(object): class ContextualLexer(Lexer): - def __init__(self, conf, states, always_accept=()): + lexers: Dict[str, TraditionalLexer] + root_lexer: TraditionalLexer + + def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: terminals = list(conf.terminals) terminals_by_name = conf.terminals_by_name @@ -452,7 +494,7 @@ class ContextualLexer(Lexer): def make_lexer_state(self, text): return self.root_lexer.make_lexer_state(text) - def lex(self, lexer_state, parser_state): + def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: try: while True: lexer = self.lexers[parser_state.position] diff --git a/lark/utils.py b/lark/utils.py index 47fe5ca..81c9128 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -6,7 +6,7 @@ from collections import deque ###{standalone import sys, re import logging -logger = logging.getLogger("lark") +logger: logging.Logger = logging.getLogger("lark") logger.addHandler(logging.StreamHandler()) # Set to highest level, since we have some warnings amongst the code # By default, we should not output any log messages