From 8d0cdeeb14cac15310991c6acc24788b3d9fb95d Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Mon, 28 Jun 2021 10:00:32 -0700 Subject: [PATCH 1/9] Start merging .pyi files ast_utils, grammar, and indenter --- lark-stubs/ast_utils.pyi | 17 --------------- lark-stubs/grammar.pyi | 14 ------------ lark-stubs/indenter.pyi | 47 ---------------------------------------- lark/ast_utils.py | 6 +++-- lark/grammar.py | 16 ++++++++------ lark/indenter.py | 43 +++++++++++++++++++++++++++++++----- 6 files changed, 51 insertions(+), 92 deletions(-) delete mode 100644 lark-stubs/ast_utils.pyi delete mode 100644 lark-stubs/grammar.pyi delete mode 100644 lark-stubs/indenter.pyi diff --git a/lark-stubs/ast_utils.pyi b/lark-stubs/ast_utils.pyi deleted file mode 100644 index 28246cf..0000000 --- a/lark-stubs/ast_utils.pyi +++ /dev/null @@ -1,17 +0,0 @@ -import types -from typing import Optional - -from .visitors import Transformer - -class Ast(object): - pass - -class AsList(object): - pass - - -def create_transformer( - ast_module: types.ModuleType, - transformer: Optional[Transformer]=None -) -> Transformer: - ... \ No newline at end of file diff --git a/lark-stubs/grammar.pyi b/lark-stubs/grammar.pyi deleted file mode 100644 index 3a3d806..0000000 --- a/lark-stubs/grammar.pyi +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Optional, Tuple - - -class RuleOptions: - keep_all_tokens: bool - expand1: bool - priority: int - template_source: Optional[str] - empty_indices: Tuple[bool, ...] - - -class Symbol: - name: str - is_term: bool diff --git a/lark-stubs/indenter.pyi b/lark-stubs/indenter.pyi deleted file mode 100644 index 3a7aa97..0000000 --- a/lark-stubs/indenter.pyi +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import Tuple, List, Iterator, Optional -from abc import ABC, abstractmethod -from .lexer import Token -from .lark import PostLex - - -class Indenter(PostLex, ABC): - paren_level: Optional[int] - indent_level: Optional[List[int]] - - def __init__(self) -> None: - ... - - def handle_NL(self, token: Token) -> Iterator[Token]: - ... - - @property - @abstractmethod - def NL_type(self) -> str: - ... - - @property - @abstractmethod - def OPEN_PAREN_types(self) -> List[str]: - ... - - @property - @abstractmethod - def CLOSE_PAREN_types(self) -> List[str]: - ... - - @property - @abstractmethod - def INDENT_type(self) -> str: - ... - - @property - @abstractmethod - def DEDENT_type(self) -> str: - ... - - @property - @abstractmethod - def tab_len(self) -> int: - ... diff --git a/lark/ast_utils.py b/lark/ast_utils.py index 0f2e498..c535f11 100644 --- a/lark/ast_utils.py +++ b/lark/ast_utils.py @@ -3,6 +3,8 @@ """ import inspect, re +import types +from typing import Optional from lark import Transformer, v_args @@ -27,7 +29,7 @@ def _call(func, _data, children, _meta): inline = v_args(wrapper=_call) -def create_transformer(ast_module, transformer=None): +def create_transformer(ast_module: types.ModuleType, transformer: Optional[Transformer]=None) -> Transformer: """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. For each class, we create a corresponding rule in the transformer, with a matching name. @@ -49,4 +51,4 @@ def create_transformer(ast_module, transformer=None): setattr(t, camel_to_snake(name), obj) - return t \ No newline at end of file + return t diff --git a/lark/grammar.py b/lark/grammar.py index 405086a..8896b17 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -1,3 +1,5 @@ +from typing import Optional, Tuple + from .utils import Serialize ###{standalone @@ -5,10 +7,10 @@ from .utils import Serialize class Symbol(Serialize): __slots__ = ('name',) - is_term = NotImplemented + is_term: bool = NotImplemented def __init__(self, name): - self.name = name + self.name: str = name def __eq__(self, other): assert isinstance(other, Symbol), other @@ -50,11 +52,11 @@ class RuleOptions(Serialize): __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): - self.keep_all_tokens = keep_all_tokens - self.expand1 = expand1 - self.priority = priority - self.template_source = template_source - self.empty_indices = empty_indices + self.keep_all_tokens: bool = keep_all_tokens + self.expand1: bool = expand1 + self.priority: int = priority + self.template_source: Optional[str] = template_source + self.empty_indices: Tuple[bool, ...] = empty_indices def __repr__(self): return 'RuleOptions(%r, %r, %r, %r)' % ( diff --git a/lark/indenter.py b/lark/indenter.py index 7e1263d..496f6e7 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -1,5 +1,8 @@ "Provides Indentation services for languages with indentation similar to Python" +from abc import ABC, abstractmethod +from typing import Tuple, List, Iterator, Optional + from .exceptions import LarkError from .lark import PostLex from .lexer import Token @@ -8,13 +11,13 @@ from .lexer import Token class DedentError(LarkError): pass -class Indenter(PostLex): - def __init__(self): - self.paren_level = None - self.indent_level = None +class Indenter(PostLex, ABC): + def __init__(self) -> None: + self.paren_level: Optional[int] = None + self.indent_level: Optional[List[int]] = None assert self.tab_len > 0 - def handle_NL(self, token): + def handle_NL(self, token: Token) -> Iterator[Token]: if self.paren_level > 0: return @@ -64,4 +67,34 @@ class Indenter(PostLex): def always_accept(self): return (self.NL_type,) + @property + @abstractmethod + def NL_type(self) -> str: + ... + + @property + @abstractmethod + def OPEN_PAREN_types(self) -> List[str]: + ... + + @property + @abstractmethod + def CLOSE_PAREN_types(self) -> List[str]: + ... + + @property + @abstractmethod + def INDENT_type(self) -> str: + ... + + @property + @abstractmethod + def DEDENT_type(self) -> str: + ... + + @property + @abstractmethod + def tab_len(self) -> int: + ... + ###} From d7b819e9cedfb53f4dd33dfe184ae03d96be0852 Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Mon, 28 Jun 2021 11:02:05 -0700 Subject: [PATCH 2/9] Continue merging .pyi files load_grammar, reconstruct, and visitors --- lark-stubs/load_grammar.pyi | 31 ----------- lark-stubs/reconstruct.pyi | 39 ------------- lark-stubs/visitors.pyi | 108 ------------------------------------ lark/indenter.py | 3 +- lark/load_grammar.py | 27 ++++----- lark/reconstruct.py | 12 ++-- lark/visitors.py | 59 +++++++++++--------- 7 files changed, 56 insertions(+), 223 deletions(-) delete mode 100644 lark-stubs/load_grammar.pyi delete mode 100644 lark-stubs/reconstruct.pyi delete mode 100644 lark-stubs/visitors.pyi diff --git a/lark-stubs/load_grammar.pyi b/lark-stubs/load_grammar.pyi deleted file mode 100644 index 86a6341..0000000 --- a/lark-stubs/load_grammar.pyi +++ /dev/null @@ -1,31 +0,0 @@ -from typing import List, Tuple, Union, Callable, Dict, Optional - -from .tree import Tree -from .grammar import RuleOptions -from .exceptions import UnexpectedInput - - -class Grammar: - rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] - term_defs: List[Tuple[str, Tuple[Tree, int]]] - ignore: List[str] - - -class GrammarBuilder: - global_keep_all_tokens: bool - import_paths: List[Union[str, Callable]] - used_files: Dict[str, str] - - def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ... - - def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ... - - def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], - base_mangle: Callable[[str], str] = None) -> None: ... - - def validate(self) -> None: ... - - def build(self) -> Grammar: ... - - -def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ... \ No newline at end of file diff --git a/lark-stubs/reconstruct.pyi b/lark-stubs/reconstruct.pyi deleted file mode 100644 index a8d39e3..0000000 --- a/lark-stubs/reconstruct.pyi +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import List, Dict, Union, Callable, Iterable - -from .grammar import Symbol -from .lark import Lark -from .tree import Tree -from .visitors import Transformer_InPlace -from .lexer import TerminalDef - - -class WriteTokensTransformer(Transformer_InPlace): - - def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... - - -class MatchTree(Tree): - pass - - -class MakeMatchTree: - name: str - expansion: List[TerminalDef] - - def __init__(self, name: str, expansion: List[TerminalDef]): - ... - - def __call__(self, args: List[Union[str, Tree]]): - ... - - -class Reconstructor: - - def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...): - ... - - def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, - insert_spaces: bool = True) -> str: - ... diff --git a/lark-stubs/visitors.pyi b/lark-stubs/visitors.pyi deleted file mode 100644 index 3a934ee..0000000 --- a/lark-stubs/visitors.pyi +++ /dev/null @@ -1,108 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union -from abc import ABC -from .tree import Tree - -_T = TypeVar('_T') -_R = TypeVar('_R') -_FUNC = Callable[..., _T] -_DECORATED = Union[_FUNC, type] - - -class Transformer(ABC, Generic[_T]): - - def __init__(self, visit_tokens: bool = True) -> None: - ... - - def transform(self, tree: Tree) -> _T: - ... - - def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: - ... - - -class TransformerChain(Generic[_T]): - transformers: Tuple[Transformer[_T], ...] - - def __init__(self, *transformers: Transformer[_T]) -> None: - ... - - def transform(self, tree: Tree) -> _T: - ... - - def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: - ... - - -class Transformer_InPlace(Transformer): - pass - - -class Transformer_NonRecursive(Transformer): - pass - - -class Transformer_InPlaceRecursive(Transformer): - pass - - -class VisitorBase: - pass - - -class Visitor(VisitorBase, ABC, Generic[_T]): - - def visit(self, tree: Tree) -> Tree: - ... - - def visit_topdown(self, tree: Tree) -> Tree: - ... - - -class Visitor_Recursive(VisitorBase): - - def visit(self, tree: Tree) -> Tree: - ... - - def visit_topdown(self, tree: Tree) -> Tree: - ... - - -class Interpreter(ABC, Generic[_T]): - - def visit(self, tree: Tree) -> _T: - ... - - def visit_children(self, tree: Tree) -> List[_T]: - ... - - -_InterMethod = Callable[[Type[Interpreter], _T], _R] - - -def v_args( - inline: bool = False, - meta: bool = False, - tree: bool = False, - wrapper: Callable = None -) -> Callable[[_DECORATED], _DECORATED]: - ... - - -def visit_children_decor(func: _InterMethod) -> _InterMethod: - ... - - -class Discard(Exception): - pass - - -# Deprecated -class InlineTransformer: - pass - - -# Deprecated -def inline_args(obj: _FUNC) -> _FUNC: - ... diff --git a/lark/indenter.py b/lark/indenter.py index 496f6e7..b6f47d6 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -1,13 +1,14 @@ "Provides Indentation services for languages with indentation similar to Python" from abc import ABC, abstractmethod -from typing import Tuple, List, Iterator, Optional from .exceptions import LarkError from .lark import PostLex from .lexer import Token ###{standalone +from typing import Tuple, List, Iterator, Optional + class DedentError(LarkError): pass diff --git a/lark/load_grammar.py b/lark/load_grammar.py index cb8856b..9ee3691 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -8,6 +8,7 @@ import pkgutil from ast import literal_eval from numbers import Integral from contextlib import suppress +from typing import List, Tuple, Union, Callable, Dict, Optional from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique from .lexer import Token, TerminalDef, PatternStr, PatternRE @@ -17,7 +18,7 @@ from .parser_frontends import ParsingFrontend from .common import LexerConf, ParserConf from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .utils import classify, dedup_list -from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError +from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput from .tree import Tree, SlottedTree as ST from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive @@ -552,9 +553,9 @@ def nr_deepcopy_tree(t): class Grammar: def __init__(self, rule_defs, term_defs, ignore): - self.term_defs = term_defs - self.rule_defs = rule_defs - self.ignore = ignore + self.term_defs: List[Tuple[str, Tuple[Tree, int]]] = term_defs + self.rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] = rule_defs + self.ignore: List[str] = ignore def compile(self, start, terminals_to_keep): # We change the trees in-place (to support huge grammars) @@ -874,7 +875,7 @@ def _search_interactive_parser(interactive_parser, predicate): if predicate(p): return path, p -def find_grammar_errors(text, start='start'): +def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: errors = [] def on_error(e): errors.append((e, _error_repr(e))) @@ -923,10 +924,10 @@ def _mangle_exp(exp, mangle): class GrammarBuilder: - def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): - self.global_keep_all_tokens = global_keep_all_tokens - self.import_paths = import_paths or [] - self.used_files = used_files or {} + def __init__(self, global_keep_all_tokens: bool=False, import_paths: List[Union[str, Callable]]=None, used_files: Dict[str, str]=None) -> None: + self.global_keep_all_tokens: bool = global_keep_all_tokens + self.import_paths: List[Union[str, Callable]] = import_paths or [] + self.used_files: Dict[str, str] = used_files or {} self._definitions = {} self._ignore_names = [] @@ -1067,7 +1068,7 @@ class GrammarBuilder: return name, exp, params, opts - def load_grammar(self, grammar_text, grammar_name="", mangle=None): + def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Callable[[str], str]=None) -> None: tree = _parse_grammar(grammar_text, grammar_name) imports = {} @@ -1130,7 +1131,7 @@ class GrammarBuilder: self._definitions = {k: v for k, v in self._definitions.items() if k in _used} - def do_import(self, dotted_path, base_path, aliases, base_mangle=None): + def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Callable[[str], str]=None) -> None: assert dotted_path mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) grammar_path = os.path.join(*dotted_path) + EXT @@ -1166,7 +1167,7 @@ class GrammarBuilder: assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) - def validate(self): + def validate(self) -> None: for name, (params, exp, _options) in self._definitions.items(): for i, p in enumerate(params): if p in self._definitions: @@ -1195,7 +1196,7 @@ class GrammarBuilder: if not set(self._definitions).issuperset(self._ignore_names): raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) - def build(self): + def build(self) -> Grammar: self.validate() rule_defs = [] term_defs = [] diff --git a/lark/reconstruct.py b/lark/reconstruct.py index ab2fb38..2f0911b 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,11 +1,13 @@ """Reconstruct text from a tree, based on Lark grammar""" +from typing import List, Dict, Union, Callable, Iterable import unicodedata +from .lark import Lark from .tree import Tree from .visitors import Transformer_InPlace -from .lexer import Token, PatternStr -from .grammar import Terminal, NonTerminal +from .lexer import Token, PatternStr, TerminalDef +from .grammar import Terminal, NonTerminal, Symbol from .tree_matcher import TreeMatcher, is_discarded_terminal from .utils import is_id_continue @@ -21,7 +23,7 @@ def is_iter_empty(i): class WriteTokensTransformer(Transformer_InPlace): "Inserts discarded tokens into their correct place, according to the rules of grammar" - def __init__(self, tokens, term_subs): + def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: self.tokens = tokens self.term_subs = term_subs @@ -70,7 +72,7 @@ class Reconstructor(TreeMatcher): term_subs: a dictionary of [Terminal name as str] to [output text as str] """ - def __init__(self, parser, term_subs=None): + def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]]=None) -> None: TreeMatcher.__init__(self, parser) self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) @@ -87,7 +89,7 @@ class Reconstructor(TreeMatcher): else: yield item - def reconstruct(self, tree, postproc=None, insert_spaces=True): + def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, insert_spaces: bool=True) -> str: x = self._reconstruct(tree) if postproc: x = postproc(x) diff --git a/lark/visitors.py b/lark/visitors.py index 22beb47..847c468 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -1,3 +1,4 @@ +from abc import ABC from functools import wraps from .utils import smart_decorator, combine_alternatives @@ -7,7 +8,12 @@ from .lexer import Token ###{standalone from inspect import getmembers, getmro +from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union +_T = TypeVar('_T') +_R = TypeVar('_R') +_FUNC = Callable[..., _T] +_DECORATED = Union[_FUNC, type] class Discard(Exception): """When raising the Discard exception in a transformer callback, @@ -46,7 +52,7 @@ class _Decoratable: return cls -class Transformer(_Decoratable): +class Transformer(_Decoratable, ABC, Generic[_T]): """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. Methods are provided by the user via inheritance, and called according to ``tree.data``. @@ -74,7 +80,7 @@ class Transformer(_Decoratable): """ __visit_tokens__ = True # For backwards compatibility - def __init__(self, visit_tokens=True): + def __init__(self, visit_tokens: bool=True) -> None: self.__visit_tokens__ = visit_tokens def _call_userfunc(self, tree, new_children=None): @@ -125,11 +131,11 @@ class Transformer(_Decoratable): children = list(self._transform_children(tree.children)) return self._call_userfunc(tree, children) - def transform(self, tree): + def transform(self, tree: Tree) -> _T: "Transform the given tree, and return the final result" return self._transform_tree(tree) - def __mul__(self, other): + def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]': """Chain two transformers together, returning a new transformer. """ return TransformerChain(self, other) @@ -149,16 +155,16 @@ class Transformer(_Decoratable): return token -class TransformerChain(object): +class TransformerChain(Generic[_T]): def __init__(self, *transformers): - self.transformers = transformers + self.transformers: Tuple[Transformer[_T], ...] = transformers - def transform(self, tree): + def transform(self, tree: Tree) -> _T: for t in self.transformers: tree = t.transform(tree) return tree - def __mul__(self, other): + def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]': return TransformerChain(*self.transformers + (other,)) @@ -239,19 +245,19 @@ class VisitorBase: return cls -class Visitor(VisitorBase): +class Visitor(VisitorBase, ABC, Generic[_T]): """Tree visitor, non-recursive (can handle huge trees). Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` """ - def visit(self, tree): + def visit(self, tree: Tree) -> Tree: "Visits the tree, starting with the leaves and finally the root (bottom-up)" for subtree in tree.iter_subtrees(): self._call_userfunc(subtree) return tree - def visit_topdown(self,tree): + def visit_topdown(self, tree: Tree) -> Tree: "Visit the tree, starting at the root, and ending at the leaves (top-down)" for subtree in tree.iter_subtrees_topdown(): self._call_userfunc(subtree) @@ -266,7 +272,7 @@ class Visitor_Recursive(VisitorBase): Slightly faster than the non-recursive version. """ - def visit(self, tree): + def visit(self, tree: Tree) -> Tree: "Visits the tree, starting with the leaves and finally the root (bottom-up)" for child in tree.children: if isinstance(child, Tree): @@ -275,7 +281,7 @@ class Visitor_Recursive(VisitorBase): self._call_userfunc(tree) return tree - def visit_topdown(self,tree): + def visit_topdown(self,tree: Tree) -> Tree: "Visit the tree, starting at the root, and ending at the leaves (top-down)" self._call_userfunc(tree) @@ -286,16 +292,7 @@ class Visitor_Recursive(VisitorBase): return tree -def visit_children_decor(func): - "See Interpreter" - @wraps(func) - def inner(cls, tree): - values = cls.visit_children(tree) - return func(cls, values) - return inner - - -class Interpreter(_Decoratable): +class Interpreter(_Decoratable, ABC, Generic[_T]): """Interpreter walks the tree starting at the root. Visits the tree, starting with the root and finally the leaves (top-down) @@ -307,7 +304,7 @@ class Interpreter(_Decoratable): This allows the user to implement branching and loops. """ - def visit(self, tree): + def visit(self, tree: Tree) -> _T: f = getattr(self, tree.data) wrapper = getattr(f, 'visit_wrapper', None) if wrapper is not None: @@ -315,7 +312,7 @@ class Interpreter(_Decoratable): else: return f(tree) - def visit_children(self, tree): + def visit_children(self, tree: Tree) -> List[_T]: return [self.visit(child) if isinstance(child, Tree) else child for child in tree.children] @@ -326,6 +323,16 @@ class Interpreter(_Decoratable): return self.visit_children(tree) +_InterMethod = Callable[[Type[Interpreter], _T], _R] + +def visit_children_decor(func: _InterMethod) -> _InterMethod: + "See Interpreter" + @wraps(func) + def inner(cls, tree): + values = cls.visit_children(tree) + return func(cls, values) + return inner + # Decorators def _apply_decorator(obj, decorator, **kwargs): @@ -380,7 +387,7 @@ def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) -def v_args(inline=False, meta=False, tree=False, wrapper=None): +def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Callable[[_DECORATED], _DECORATED]=None) -> Callable[[_DECORATED], _DECORATED]: """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. By default, callback methods of transformers/visitors accept one argument - a list of the node's children. From 089bc2b523ce7286fa3a6ee1046c4324b67d15f4 Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Mon, 28 Jun 2021 15:56:39 -0700 Subject: [PATCH 3/9] More .pyi merging exceptions, lark, and tree --- lark-stubs/exceptions.pyi | 65 --------------------------------------- lark-stubs/lark.pyi | 62 ++----------------------------------- lark-stubs/tree.pyi | 62 ++----------------------------------- lark/exceptions.py | 30 ++++++++++++++++-- lark/lark.py | 56 ++++++++++++++++++++++++--------- lark/load_grammar.py | 8 +++-- lark/tree.py | 40 +++++++++++++++++------- 7 files changed, 109 insertions(+), 214 deletions(-) delete mode 100644 lark-stubs/exceptions.pyi diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi deleted file mode 100644 index 1c04fa8..0000000 --- a/lark-stubs/exceptions.pyi +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set -from .tree import Tree -from .lexer import Token -from .parsers.lalr_interactive_parser import InteractiveParser - -class LarkError(Exception): - pass - - -class ConfigurationError(LarkError, ValueError): - pass - - -class GrammarError(LarkError): - pass - - -class ParseError(LarkError): - pass - - -class LexError(LarkError): - pass - - -T = TypeVar('T') - -class UnexpectedEOF(ParseError): - expected: List[Token] - -class UnexpectedInput(LarkError): - line: int - column: int - pos_in_stream: int - state: Any - - def get_context(self, text: str, span: int = ...) -> str: - ... - - def match_examples( - self, - parse_fn: Callable[[str], Tree], - examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], - token_type_match_fallback: bool = False, - use_accepts: bool = False, - ) -> T: - ... - - -class UnexpectedToken(ParseError, UnexpectedInput): - expected: Set[str] - considered_rules: Set[str] - interactive_parser: InteractiveParser - accepts: Set[str] - -class UnexpectedCharacters(LexError, UnexpectedInput): - allowed: Set[str] - considered_tokens: Set[Any] - - -class VisitError(LarkError): - obj: Union[Tree, Token] - orig_exc: Exception diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 27c6863..579e802 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -1,19 +1,13 @@ # -*- coding: utf-8 -*- from typing import ( - TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, + Type, List, Dict, IO, Iterator, Callable, Union, Optional, Literal, Protocol, Tuple, Iterable, ) -from .parsers.lalr_interactive_parser import InteractiveParser from .visitors import Transformer from .lexer import Token, Lexer, TerminalDef -from .tree import Tree -from .exceptions import UnexpectedInput -from .load_grammar import Grammar - -_T = TypeVar('_T') - +from .load_grammar import Grammar, PackageResource class PostLex(Protocol): @@ -22,39 +16,8 @@ class PostLex(Protocol): always_accept: Iterable[str] - class LarkOptions: - start: List[str] - parser: str - lexer: str - transformer: Optional[Transformer] - postlex: Optional[PostLex] - ambiguity: str - regex: bool - debug: bool - keep_all_tokens: bool - propagate_positions: Union[bool, str] - maybe_placeholders: bool - lexer_callbacks: Dict[str, Callable[[Token], Token]] - cache: Union[bool, str] - g_regex_flags: int - use_bytes: bool - import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] - source_path: Optional[str] - - -class PackageResource(object): - pkg_name: str - path: str - - def __init__(self, pkg_name: str, path: str): ... - - -class FromPackageLoader: - def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... - - def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... - + ... class Lark: source_path: str @@ -88,22 +51,3 @@ class Lark: ): ... - def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: - ... - - def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: - ... - - @classmethod - def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: - ... - - @classmethod - def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: - ... - - def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: - ... - - def get_terminal(self, name: str) -> TerminalDef: - ... diff --git a/lark-stubs/tree.pyi b/lark-stubs/tree.pyi index ea99ff6..824e9e7 100644 --- a/lark-stubs/tree.pyi +++ b/lark-stubs/tree.pyi @@ -1,67 +1,9 @@ # -*- coding: utf-8 -*- -from typing import List, Callable, Iterator, Union, Optional, Literal, Any -from .lexer import TerminalDef - -class Meta: - empty: bool - line: int - column: int - start_pos: int - end_line: int - end_column: int - end_pos: int - orig_expansion: List[TerminalDef] - match_tree: bool - +from typing import Literal class Tree: - data: str - children: List[Union[str, Tree]] - meta: Meta - - def __init__( - self, - data: str, - children: List[Union[str, Tree]], - meta: Optional[Meta] = None - ) -> None: - ... - - def pretty(self, indent_str: str = ...) -> str: - ... - - def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: - ... - - def find_data(self, data: str) -> Iterator[Tree]: - ... - - def expand_kids_by_index(self, *indices: int) -> None: - ... - - def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: - ... - - def iter_subtrees(self) -> Iterator[Tree]: - ... - - def iter_subtrees_topdown(self) -> Iterator[Tree]: - ... - - def copy(self) -> Tree: - ... - - def set(self, data: str, children: List[Union[str, Tree]]) -> None: - ... - - def __hash__(self) -> int: - ... - - -class SlottedTree(Tree): - pass - + ... def pydot__tree_to_png( tree: Tree, diff --git a/lark/exceptions.py b/lark/exceptions.py index 2ae0859..3276db5 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE ###{standalone +from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING + +if TYPE_CHECKING: + from .lexer import Token + from .parsers.lalr_interactive_parser import InteractiveParser + from .tree import Tree class LarkError(Exception): pass @@ -28,6 +34,7 @@ class ParseError(LarkError): class LexError(LarkError): pass +T = TypeVar('T') class UnexpectedInput(LarkError): """UnexpectedInput Error. @@ -39,10 +46,13 @@ class UnexpectedInput(LarkError): After catching one of these exceptions, you may call the following helper methods to create a nicer error message. """ + line: int + column: int pos_in_stream = None + state: Any _terminals_by_name = None - def get_context(self, text, span=40): + def get_context(self, text: str, span: int=40) -> str: """Returns a pretty string pinpointing the error in the text, with span amount of context characters around it. @@ -63,7 +73,7 @@ class UnexpectedInput(LarkError): after = text[pos:end].split(b'\n', 1)[0] return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") - def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): + def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: """Allows you to detect what's wrong in the input text by matching against example errors. @@ -126,6 +136,9 @@ class UnexpectedInput(LarkError): class UnexpectedEOF(ParseError, UnexpectedInput): + + expected: 'List[Token]' + def __init__(self, expected, state=None, terminals_by_name=None): self.expected = expected self.state = state @@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput): + + allowed: Set[str] + considered_tokens: Set[Any] + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, terminals_by_name=None, considered_rules=None): # TODO considered_tokens and allowed can be figured out using state @@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): see: ``InteractiveParser``. """ + expected: Set[str] + considered_rules: Set[str] + interactive_parser: 'InteractiveParser' + def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): # TODO considered_rules and expected can be figured out using state self.line = getattr(token, 'line', '?') @@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): super(UnexpectedToken, self).__init__() @property - def accepts(self): + def accepts(self) -> Set[str]: if self._accepts is NO_VALUE: self._accepts = self.interactive_parser and self.interactive_parser.accepts() return self._accepts @@ -228,6 +249,9 @@ class VisitError(LarkError): - orig_exc: the exception that cause it to fail """ + obj: 'Union[Tree, Token]' + orig_exc: Exception + def __init__(self, rule, obj, orig_exc): self.obj = obj self.orig_exc = orig_exc diff --git a/lark/lark.py b/lark/lark.py index a4d223e..1bd8e52 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,6 +1,10 @@ from abc import ABC, abstractmethod import sys, os, pickle, hashlib import tempfile +from typing import ( + TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, + Tuple, Iterable, TYPE_CHECKING +) from .exceptions import ConfigurationError, assert_config from .utils import Serialize, SerializeMemoizer, FS, isascii, logger @@ -8,7 +12,7 @@ from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_ from .tree import Tree from .common import LexerConf, ParserConf -from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread +from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import get_frontend, _get_lexer_callbacks from .grammar import Rule @@ -19,14 +23,44 @@ try: except ImportError: regex = None +if TYPE_CHECKING: + from .load_grammar import PackageResource + from .exceptions import UnexpectedInput + from .parsers.lalr_interactive_parser import InteractiveParser + from .visitors import Transformer ###{standalone +class PostLex(ABC): + @abstractmethod + def process(self, stream): + return stream + + always_accept = () class LarkOptions(Serialize): """Specifies the options for Lark """ + + start: List[str] + parser: str + lexer: str + transformer: 'Optional[Transformer]' + postlex: Optional[PostLex] + ambiguity: str + regex: bool + debug: bool + keep_all_tokens: bool + propagate_positions: Union[bool, str] + maybe_placeholders: bool + lexer_callbacks: Dict[str, Callable[[Token], Token]] + cache: Union[bool, str] + g_regex_flags: int + use_bytes: bool + import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' + source_path: Optional[str] + OPTIONS_DOC = """ **=== General Options ===** @@ -189,13 +223,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') -class PostLex(ABC): - @abstractmethod - def process(self, stream): - return stream - - always_accept = () - +_T = TypeVar('_T') class Lark(Serialize): """Main interface for the library. @@ -476,7 +504,7 @@ class Lark(Serialize): return inst._load({'data': data, 'memo': memo}, **kwargs) @classmethod - def open(cls, grammar_filename, rel_to=None, **options): + def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: """Create an instance of Lark with the grammar given by its filename If ``rel_to`` is provided, the function will find the grammar filename in relation to it. @@ -494,7 +522,7 @@ class Lark(Serialize): return cls(f, **options) @classmethod - def open_from_package(cls, package, grammar_path, search_paths=("",), **options): + def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: """Create an instance of Lark with the grammar loaded from within the package `package`. This allows grammar loading from zipapps. @@ -515,7 +543,7 @@ class Lark(Serialize): return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) - def lex(self, text, dont_ignore=False): + def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. @@ -530,11 +558,11 @@ class Lark(Serialize): return self.options.postlex.process(stream) return stream - def get_terminal(self, name): + def get_terminal(self, name: str) -> TerminalDef: """Get information about a terminal""" return self._terminals_dict[name] - def parse_interactive(self, text=None, start=None): + def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': """Start an interactive parsing session. Parameters: @@ -548,7 +576,7 @@ class Lark(Serialize): """ return self.parser.parse_interactive(text, start=start) - def parse(self, text, start=None, on_error=None): + def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: """Parse the given text, according to the options provided. Parameters: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9ee3691..5073475 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -691,14 +691,18 @@ class FromPackageLoader(object): pkg_name: The name of the package. You can probably provide `__name__` most of the time search_paths: All the path that will be search on absolute imports. """ - def __init__(self, pkg_name, search_paths=("", )): + + pkg_name: str + search_paths: Tuple[str, ...] + + def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: self.pkg_name = pkg_name self.search_paths = search_paths def __repr__(self): return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) - def __call__(self, base_path, grammar_path): + def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: if base_path is None: to_try = self.search_paths else: diff --git a/lark/tree.py b/lark/tree.py index 468894a..cb29c8a 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -8,9 +8,23 @@ from copy import deepcopy ###{standalone from collections import OrderedDict +from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING +if TYPE_CHECKING: + from .lexer import TerminalDef class Meta: + + empty: bool + line: int + column: int + start_pos: int + end_line: int + end_column: int + end_pos: int + orig_expansion: 'List[TerminalDef]' + match_tree: bool + def __init__(self): self.empty = True @@ -27,13 +41,17 @@ class Tree(object): meta: Line & Column numbers (if ``propagate_positions`` is enabled). meta attributes: line, column, start_pos, end_line, end_column, end_pos """ - def __init__(self, data, children, meta=None): + + data: str + children: 'List[Union[str, Tree]]' + + def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: self.data = data self.children = children self._meta = meta @property - def meta(self): + def meta(self) -> Meta: if self._meta is None: self._meta = Meta() return self._meta @@ -57,7 +75,7 @@ class Tree(object): return l - def pretty(self, indent_str=' '): + def pretty(self, indent_str: str=' ') -> str: """Returns an indented string representation of the tree. Great for debugging. @@ -73,10 +91,10 @@ class Tree(object): def __ne__(self, other): return not (self == other) - def __hash__(self): + def __hash__(self) -> int: return hash((self.data, tuple(self.children))) - def iter_subtrees(self): + def iter_subtrees(self) -> 'Iterator[Tree]': """Depth-first iteration. Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). @@ -91,23 +109,23 @@ class Tree(object): del queue return reversed(list(subtrees.values())) - def find_pred(self, pred): + def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': """Returns all nodes of the tree that evaluate pred(node) as true.""" return filter(pred, self.iter_subtrees()) - def find_data(self, data): + def find_data(self, data: str) -> 'Iterator[Tree]': """Returns all nodes of the tree whose data equals the given data.""" return self.find_pred(lambda t: t.data == data) ###} - def expand_kids_by_index(self, *indices): + def expand_kids_by_index(self, *indices: int) -> None: """Expand (inline) children at the given indices""" for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices kid = self.children[i] self.children[i:i+1] = kid.children - def scan_values(self, pred): + def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: """Return all values in the tree that evaluate pred(value) as true. This can be used to find all the tokens in the tree. @@ -140,10 +158,10 @@ class Tree(object): def __deepcopy__(self, memo): return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) - def copy(self): + def copy(self) -> 'Tree': return type(self)(self.data, self.children) - def set(self, data, children): + def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: self.data = data self.children = children From 433adeeaff80306f5ba4bfb246d5eef8180f62cc Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Tue, 29 Jun 2021 13:08:32 -0700 Subject: [PATCH 4/9] Last batch of .pyi files __init__, lexer, and parsers/__init__ --- lark-stubs/__init__.pyi | 12 --- lark-stubs/lexer.pyi | 161 -------------------------------- lark-stubs/parsers/__init__.pyi | 0 lark/__init__.py | 2 +- lark/common.py | 19 +++- lark/lexer.py | 98 +++++++++++++------ lark/utils.py | 2 +- 7 files changed, 89 insertions(+), 205 deletions(-) delete mode 100644 lark-stubs/__init__.pyi delete mode 100644 lark-stubs/lexer.pyi delete mode 100644 lark-stubs/parsers/__init__.pyi diff --git a/lark-stubs/__init__.pyi b/lark-stubs/__init__.pyi deleted file mode 100644 index c79a6ef..0000000 --- a/lark-stubs/__init__.pyi +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- - -from .tree import * -from .visitors import * -from .exceptions import * -from .lexer import * -from .load_grammar import * -from .lark import * -from logging import Logger as _Logger - -logger: _Logger -__version__: str = ... diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi deleted file mode 100644 index 004865c..0000000 --- a/lark-stubs/lexer.pyi +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- -from types import ModuleType -from typing import ( - TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, - Pattern as REPattern, -) -from abc import abstractmethod, ABC - -_T = TypeVar('_T') - - -class Pattern(ABC): - value: str - flags: Collection[str] - raw: str - type: str - - def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None: - ... - - @abstractmethod - def to_regexp(self) -> str: - ... - - @property - @abstractmethod - def min_width(self) -> int: - ... - - @property - @abstractmethod - def max_width(self) -> int: - ... - - -class PatternStr(Pattern): - type: str = ... - - def to_regexp(self) -> str: - ... - - @property - def min_width(self) -> int: - ... - - @property - def max_width(self) -> int: - ... - - -class PatternRE(Pattern): - type: str = ... - - def to_regexp(self) -> str: - ... - - @property - def min_width(self) -> int: - ... - - @property - def max_width(self) -> int: - ... - - -class TerminalDef: - name: str - pattern: Pattern - priority: int - - def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None: - ... - - def user_repr(self) -> str: ... - - -class Token(str): - type: str - start_pos: int - value: Any - line: int - column: int - end_line: int - end_column: int - end_pos: int - - def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: - ... - - def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: - ... - - @classmethod - def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T: - ... - - -_Callback = Callable[[Token], Token] - - -class Lexer(ABC): - lex: Callable[..., Iterator[Token]] - - -class LexerConf: - tokens: Collection[TerminalDef] - re_module: ModuleType - ignore: Collection[str] = () - postlex: Any =None - callbacks: Optional[Dict[str, _Callback]] = None - g_regex_flags: int = 0 - skip_validation: bool = False - use_bytes: bool = False - - - -class TraditionalLexer(Lexer): - terminals: Collection[TerminalDef] - ignore_types: FrozenSet[str] - newline_types: FrozenSet[str] - user_callbacks: Dict[str, _Callback] - callback: Dict[str, _Callback] - mres: List[Tuple[REPattern, Dict[int, str]]] - re: ModuleType - - def __init__( - self, - conf: LexerConf - ) -> None: - ... - - def build(self) -> None: - ... - - def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]: - ... - - def lex(self, stream: str) -> Iterator[Token]: - ... - - def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: - ... - -class ContextualLexer(Lexer): - lexers: Dict[str, TraditionalLexer] - root_lexer: TraditionalLexer - - def __init__( - self, - terminals: Collection[TerminalDef], - states: Dict[str, Collection[str]], - re_: ModuleType, - ignore: Collection[str] = ..., - always_accept: Collection[str] = ..., - user_callbacks: Dict[str, _Callback] = ..., - g_regex_flags: int = ... - ) -> None: - ... - - def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]: - ... diff --git a/lark-stubs/parsers/__init__.pyi b/lark-stubs/parsers/__init__.pyi deleted file mode 100644 index e69de29..0000000 diff --git a/lark/__init__.py b/lark/__init__.py index aff5683..609cfc7 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, from .lexer import Token from .lark import Lark -__version__ = "1.0.0a" +__version__: str = "1.0.0a" diff --git a/lark/common.py b/lark/common.py index e2cde6b..ccd5e16 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,14 +1,29 @@ from .utils import Serialize -from .lexer import TerminalDef +from .lexer import TerminalDef, Token ###{standalone +from types import ModuleType +from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING +if TYPE_CHECKING: + from .lark import PostLex + +_Callback = Callable[[Token], Token] class LexerConf(Serialize): __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' __serialize_namespace__ = TerminalDef, - def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): + terminals: Collection[TerminalDef] + re_module: ModuleType + ignore: Collection[str] = () + postlex: 'PostLex' = None + callbacks: Optional[Dict[str, _Callback]] = None + g_regex_flags: int = 0 + skip_validation: bool = False + use_bytes: bool = False + + def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): self.terminals = terminals self.terminals_by_name = {t.name: t for t in self.terminals} assert len(self.terminals) == len(self.terminals_by_name) diff --git a/lark/lexer.py b/lark/lexer.py index 77f7090..6177d26 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -1,5 +1,6 @@ # Lexer Implementation +from abc import abstractmethod, ABC import re from contextlib import suppress @@ -9,12 +10,23 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken ###{standalone from copy import copy +from types import ModuleType +from typing import ( + TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, + Pattern as REPattern, TYPE_CHECKING +) -class Pattern(Serialize): - raw = None - type = None +if TYPE_CHECKING: + from .common import LexerConf - def __init__(self, value, flags=(), raw=None): +class Pattern(Serialize, ABC): + + value: str + flags: Collection[str] + raw: str = None + type: str = None + + def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: self.value = value self.flags = frozenset(flags) self.raw = raw @@ -29,13 +41,18 @@ class Pattern(Serialize): def __eq__(self, other): return type(self) == type(other) and self.value == other.value and self.flags == other.flags - def to_regexp(self): + @abstractmethod + def to_regexp(self) -> str: raise NotImplementedError() - def min_width(self): + @property + @abstractmethod + def min_width(self) -> int: raise NotImplementedError() - def max_width(self): + @property + @abstractmethod + def max_width(self) -> int: raise NotImplementedError() if Py36: @@ -56,13 +73,13 @@ class Pattern(Serialize): class PatternStr(Pattern): __serialize_fields__ = 'value', 'flags' - type = "str" + type: str = "str" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(re.escape(self.value)) @property - def min_width(self): + def min_width(self) -> int: return len(self.value) max_width = min_width @@ -70,9 +87,9 @@ class PatternStr(Pattern): class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', '_width' - type = "re" + type: str = "re" - def to_regexp(self): + def to_regexp(self) -> str: return self._get_flags(self.value) _width = None @@ -82,11 +99,11 @@ class PatternRE(Pattern): return self._width @property - def min_width(self): + def min_width(self) -> int: return self._get_width()[0] @property - def max_width(self): + def max_width(self) -> int: return self._get_width()[1] @@ -94,7 +111,11 @@ class TerminalDef(Serialize): __serialize_fields__ = 'name', 'pattern', 'priority' __serialize_namespace__ = PatternStr, PatternRE - def __init__(self, name, pattern, priority=1): + name: str + pattern: Pattern + priority: int + + def __init__(self, name: str, pattern: Pattern, priority: int=1) -> None: assert isinstance(pattern, Pattern), pattern self.name = name self.pattern = pattern @@ -103,12 +124,13 @@ class TerminalDef(Serialize): def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) - def user_repr(self): + def user_repr(self) -> str: if self.name.startswith('__'): # We represent a generated terminal return self.pattern.raw or self.name else: return self.name +_T = TypeVar('_T') class Token(str): """A string with meta-information, that is produced by the lexer. @@ -131,6 +153,15 @@ class Token(str): """ __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') + type: str + start_pos: int + value: Any + line: int + column: int + end_line: int + end_column: int + end_pos: int + def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): try: self = super(Token, cls).__new__(cls, value) @@ -148,7 +179,7 @@ class Token(str): self.end_pos = end_pos return self - def update(self, type_=None, value=None): + def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': return Token.new_borrow_pos( type_ if type_ is not None else self.type, value if value is not None else self.value, @@ -156,7 +187,7 @@ class Token(str): ) @classmethod - def new_borrow_pos(cls, type_, value, borrow_t): + def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): @@ -289,14 +320,15 @@ def _regexp_has_newline(r): """ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) +_Callback = Callable[[Token], Token] -class Lexer(object): +class Lexer(ABC): """Lexer interface Method Signatures: lex(self, text) -> Iterator[Token] """ - lex = NotImplemented + lex: Callable[..., Iterator[Token]] = NotImplemented def make_lexer_state(self, text): line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') @@ -305,7 +337,14 @@ class Lexer(object): class TraditionalLexer(Lexer): - def __init__(self, conf): + terminals: Collection[TerminalDef] + ignore_types: FrozenSet[str] + newline_types: FrozenSet[str] + user_callbacks: Dict[str, _Callback] + callback: Dict[str, _Callback] + re: ModuleType + + def __init__(self, conf: 'LexerConf') -> None: terminals = list(conf.terminals) assert all(isinstance(t, TerminalDef) for t in terminals), terminals @@ -338,7 +377,7 @@ class TraditionalLexer(Lexer): self._mres = None - def _build(self): + def _build(self) -> None: terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) assert all(self.callback.values()) @@ -352,23 +391,23 @@ class TraditionalLexer(Lexer): self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes) @property - def mres(self): + def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: if self._mres is None: self._build() return self._mres - def match(self, text, pos): + def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: for mre, type_from_index in self.mres: m = mre.match(text, pos) if m: return m.group(0), type_from_index[m.lastindex] - def lex(self, state, parser_state): + def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: with suppress(EOFError): while True: yield self.next_token(state, parser_state) - def next_token(self, lex_state, parser_state=None): + def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) @@ -424,7 +463,10 @@ class LexerState(object): class ContextualLexer(Lexer): - def __init__(self, conf, states, always_accept=()): + lexers: Dict[str, TraditionalLexer] + root_lexer: TraditionalLexer + + def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: terminals = list(conf.terminals) terminals_by_name = conf.terminals_by_name @@ -452,7 +494,7 @@ class ContextualLexer(Lexer): def make_lexer_state(self, text): return self.root_lexer.make_lexer_state(text) - def lex(self, lexer_state, parser_state): + def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: try: while True: lexer = self.lexers[parser_state.position] diff --git a/lark/utils.py b/lark/utils.py index 47fe5ca..81c9128 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -6,7 +6,7 @@ from collections import deque ###{standalone import sys, re import logging -logger = logging.getLogger("lark") +logger: logging.Logger = logging.getLogger("lark") logger.addHandler(logging.StreamHandler()) # Set to highest level, since we have some warnings amongst the code # By default, we should not output any log messages From be76059015291ac4e3e4bd9e4fe51184c3bf4f74 Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Tue, 29 Jun 2021 14:45:38 -0700 Subject: [PATCH 5/9] Merge the Literal types --- lark-stubs/lark.pyi | 53 --------------------------------------- lark-stubs/tree.pyi | 14 ----------- lark/grammar.py | 3 ++- lark/lark.py | 60 +++++++++++++++++++++++++++------------------ lark/tree.py | 6 ++++- 5 files changed, 43 insertions(+), 93 deletions(-) delete mode 100644 lark-stubs/lark.pyi delete mode 100644 lark-stubs/tree.pyi diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi deleted file mode 100644 index 579e802..0000000 --- a/lark-stubs/lark.pyi +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import ( - Type, List, Dict, IO, Iterator, Callable, Union, Optional, - Literal, Protocol, Tuple, Iterable, -) - -from .visitors import Transformer -from .lexer import Token, Lexer, TerminalDef -from .load_grammar import Grammar, PackageResource - -class PostLex(Protocol): - - def process(self, stream: Iterator[Token]) -> Iterator[Token]: - ... - - always_accept: Iterable[str] - -class LarkOptions: - ... - -class Lark: - source_path: str - source_grammar: str - grammar: Grammar - options: LarkOptions - lexer: Lexer - terminals: List[TerminalDef] - - def __init__( - self, - grammar: Union[Grammar, str, IO[str]], - *, - start: Union[None, str, List[str]] = "start", - parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", - lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", - transformer: Optional[Transformer] = None, - postlex: Optional[PostLex] = None, - ambiguity: Literal["explicit", "resolve"] = "resolve", - regex: bool = False, - debug: bool = False, - keep_all_tokens: bool = False, - propagate_positions: Union[bool, str] = False, - maybe_placeholders: bool = False, - lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, - cache: Union[bool, str] = False, - g_regex_flags: int = ..., - use_bytes: bool = False, - import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., - source_path: Optional[str]=None, - ): - ... - diff --git a/lark-stubs/tree.pyi b/lark-stubs/tree.pyi deleted file mode 100644 index 824e9e7..0000000 --- a/lark-stubs/tree.pyi +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import Literal - -class Tree: - ... - -def pydot__tree_to_png( - tree: Tree, - filename: str, - rankdir: Literal["TB", "LR", "BT", "RL"] = ..., - **kwargs -) -> None: - ... diff --git a/lark/grammar.py b/lark/grammar.py index 8896b17..6045620 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -1,9 +1,10 @@ -from typing import Optional, Tuple from .utils import Serialize ###{standalone +from typing import Optional, Tuple + class Symbol(Serialize): __slots__ = ('name',) diff --git a/lark/lark.py b/lark/lark.py index 1bd8e52..b4c767b 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,14 +1,10 @@ from abc import ABC, abstractmethod import sys, os, pickle, hashlib import tempfile -from typing import ( - TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, - Tuple, Iterable, TYPE_CHECKING -) -from .exceptions import ConfigurationError, assert_config +from .exceptions import ConfigurationError, assert_config, UnexpectedInput from .utils import Serialize, SerializeMemoizer, FS, isascii, logger -from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files +from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource from .tree import Tree from .common import LexerConf, ParserConf @@ -23,20 +19,27 @@ try: except ImportError: regex = None + +###{standalone +from typing import ( + TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, + Tuple, Iterable, IO, Any, TYPE_CHECKING +) + if TYPE_CHECKING: - from .load_grammar import PackageResource - from .exceptions import UnexpectedInput from .parsers.lalr_interactive_parser import InteractiveParser from .visitors import Transformer - -###{standalone + if sys.version_info >= (3, 8): + from typing import Literal + else: + from typing_extensions import Literal class PostLex(ABC): @abstractmethod - def process(self, stream): + def process(self, stream: Iterator[Token]) -> Iterator[Token]: return stream - always_accept = () + always_accept: Iterable[str] = () class LarkOptions(Serialize): """Specifies the options for Lark @@ -44,20 +47,23 @@ class LarkOptions(Serialize): """ start: List[str] - parser: str - lexer: str - transformer: 'Optional[Transformer]' - postlex: Optional[PostLex] - ambiguity: str - regex: bool debug: bool - keep_all_tokens: bool + transformer: 'Optional[Transformer]' propagate_positions: Union[bool, str] maybe_placeholders: bool - lexer_callbacks: Dict[str, Callable[[Token], Token]] cache: Union[bool, str] + regex: bool g_regex_flags: int + keep_all_tokens: bool + tree_class: Any + parser: 'Literal["earley", "lalr", "cyk", "auto"]' + lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' + ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' + postlex: Optional[PostLex] + priority: 'Optional[Literal["auto", "normal", "invert"]]' + lexer_callbacks: Dict[str, Callable[[Token], Token]] use_bytes: bool + edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' source_path: Optional[str] @@ -140,9 +146,7 @@ class LarkOptions(Serialize): # Adding a new option needs to be done in multiple places: # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs - # - In `lark-stubs/lark.pyi`: - # - As attribute to `LarkOptions` - # - As parameter to `Lark.__init__` + # - As an attribute of `LarkOptions` above # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument _defaults = { @@ -238,7 +242,15 @@ class Lark(Serialize): >>> Lark(r'''start: "foo" ''') Lark(...) """ - def __init__(self, grammar, **options): + + source_path: str + source_grammar: str + grammar: 'Grammar' + options: LarkOptions + lexer: Lexer + terminals: List[TerminalDef] + + def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: self.options = LarkOptions(options) # Set regex or re module diff --git a/lark/tree.py b/lark/tree.py index cb29c8a..ff56dea 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -12,6 +12,10 @@ from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING if TYPE_CHECKING: from .lexer import TerminalDef + if sys.version_info >= (3, 8): + from typing import Literal + else: + from typing_extensions import Literal class Meta: @@ -171,7 +175,7 @@ class SlottedTree(Tree): __slots__ = 'data', 'children', 'rule', '_meta' -def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs): +def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None: graph = pydot__tree_to_graph(tree, rankdir, **kwargs) graph.write_png(filename) From bca7c79b1fa3551d80cba294210d914b15772b2f Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Tue, 29 Jun 2021 15:10:48 -0700 Subject: [PATCH 6/9] Declare instance variable types at class level --- lark/grammar.py | 23 +++++++++++++++-------- lark/indenter.py | 8 ++++++-- lark/load_grammar.py | 30 ++++++++++++++++++++---------- lark/reconstruct.py | 11 ++++++++--- lark/visitors.py | 11 +++++++---- 5 files changed, 56 insertions(+), 27 deletions(-) diff --git a/lark/grammar.py b/lark/grammar.py index 6045620..be1aff6 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -8,10 +8,11 @@ from typing import Optional, Tuple class Symbol(Serialize): __slots__ = ('name',) + name: str is_term: bool = NotImplemented - def __init__(self, name): - self.name: str = name + def __init__(self, name: str) -> None: + self.name = name def __eq__(self, other): assert isinstance(other, Symbol), other @@ -52,12 +53,18 @@ class NonTerminal(Symbol): class RuleOptions(Serialize): __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' - def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): - self.keep_all_tokens: bool = keep_all_tokens - self.expand1: bool = expand1 - self.priority: int = priority - self.template_source: Optional[str] = template_source - self.empty_indices: Tuple[bool, ...] = empty_indices + keep_all_tokens: bool + expand1: bool + priority: Optional[int] + template_source: Optional[str] + empty_indices: Tuple[bool, ...] + + def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: + self.keep_all_tokens = keep_all_tokens + self.expand1 = expand1 + self.priority = priority + self.template_source = template_source + self.empty_indices = empty_indices def __repr__(self): return 'RuleOptions(%r, %r, %r, %r)' % ( diff --git a/lark/indenter.py b/lark/indenter.py index b6f47d6..03c5093 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -13,9 +13,13 @@ class DedentError(LarkError): pass class Indenter(PostLex, ABC): + + paren_level: Optional[int] + indent_level: Optional[List[int]] + def __init__(self) -> None: - self.paren_level: Optional[int] = None - self.indent_level: Optional[List[int]] = None + self.paren_level = None + self.indent_level = None assert self.tab_len > 0 def handle_NL(self, token: Token) -> Iterator[Token]: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 5073475..3e9b95a 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -552,10 +552,15 @@ def nr_deepcopy_tree(t): class Grammar: - def __init__(self, rule_defs, term_defs, ignore): - self.term_defs: List[Tuple[str, Tuple[Tree, int]]] = term_defs - self.rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] = rule_defs - self.ignore: List[str] = ignore + + term_defs: List[Tuple[str, Tuple[Tree, int]]] + rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] + ignore: List[str] + + def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: + self.term_defs = term_defs + self.rule_defs = rule_defs + self.ignore = ignore def compile(self, start, terminals_to_keep): # We change the trees in-place (to support huge grammars) @@ -928,10 +933,15 @@ def _mangle_exp(exp, mangle): class GrammarBuilder: - def __init__(self, global_keep_all_tokens: bool=False, import_paths: List[Union[str, Callable]]=None, used_files: Dict[str, str]=None) -> None: - self.global_keep_all_tokens: bool = global_keep_all_tokens - self.import_paths: List[Union[str, Callable]] = import_paths or [] - self.used_files: Dict[str, str] = used_files or {} + + global_keep_all_tokens: bool + import_paths: List[Union[str, Callable]] + used_files: Dict[str, str] + + def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: + self.global_keep_all_tokens = global_keep_all_tokens + self.import_paths = import_paths or [] + self.used_files = used_files or {} self._definitions = {} self._ignore_names = [] @@ -1072,7 +1082,7 @@ class GrammarBuilder: return name, exp, params, opts - def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Callable[[str], str]=None) -> None: + def load_grammar(self, grammar_text: str, grammar_name: str="", mangle: Optional[Callable[[str], str]]=None) -> None: tree = _parse_grammar(grammar_text, grammar_name) imports = {} @@ -1135,7 +1145,7 @@ class GrammarBuilder: self._definitions = {k: v for k, v in self._definitions.items() if k in _used} - def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Callable[[str], str]=None) -> None: + def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: assert dotted_path mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) grammar_path = os.path.join(*dotted_path) + EXT diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 2f0911b..aa8c753 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,6 +1,6 @@ """Reconstruct text from a tree, based on Lark grammar""" -from typing import List, Dict, Union, Callable, Iterable +from typing import List, Dict, Union, Callable, Iterable, Optional import unicodedata from .lark import Lark @@ -23,6 +23,9 @@ def is_iter_empty(i): class WriteTokensTransformer(Transformer_InPlace): "Inserts discarded tokens into their correct place, according to the rules of grammar" + tokens: Dict[str, TerminalDef] + term_subs: Dict[str, Callable[[Symbol], str]] + def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: self.tokens = tokens self.term_subs = term_subs @@ -72,7 +75,9 @@ class Reconstructor(TreeMatcher): term_subs: a dictionary of [Terminal name as str] to [output text as str] """ - def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]]=None) -> None: + write_tokens: WriteTokensTransformer + + def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: TreeMatcher.__init__(self, parser) self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) @@ -89,7 +94,7 @@ class Reconstructor(TreeMatcher): else: yield item - def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, insert_spaces: bool=True) -> str: + def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: x = self._reconstruct(tree) if postproc: x = postproc(x) diff --git a/lark/visitors.py b/lark/visitors.py index 847c468..2c7309f 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -8,7 +8,7 @@ from .lexer import Token ###{standalone from inspect import getmembers, getmro -from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union +from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional _T = TypeVar('_T') _R = TypeVar('_R') @@ -156,8 +156,11 @@ class Transformer(_Decoratable, ABC, Generic[_T]): class TransformerChain(Generic[_T]): - def __init__(self, *transformers): - self.transformers: Tuple[Transformer[_T], ...] = transformers + + transformers: Tuple[Transformer[_T], ...] + + def __init__(self, *transformers: Transformer[_T]) -> None: + self.transformers = transformers def transform(self, tree: Tree) -> _T: for t in self.transformers: @@ -387,7 +390,7 @@ def _vargs_tree(f, data, children, meta): return f(Tree(data, children, meta)) -def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Callable[[_DECORATED], _DECORATED]=None) -> Callable[[_DECORATED], _DECORATED]: +def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]: """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. By default, callback methods of transformers/visitors accept one argument - a list of the node's children. From 1c4af01a117f8a010cf88dced77bd6aea60cb88d Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Tue, 29 Jun 2021 15:15:51 -0700 Subject: [PATCH 7/9] Update mypy workflow --- .github/workflows/mypy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 85039a4..f1c667f 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -16,4 +16,4 @@ jobs: python -m pip install --upgrade pip pip install mypy - name: Lint with mypy - run: mypy -p lark-stubs || true + run: mypy -p lark || true From 4bc9445238ed19280dbbf7afc25d36bb6aa254c6 Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Wed, 30 Jun 2021 12:49:14 -0700 Subject: [PATCH 8/9] Corrections for PR and some mypy errors --- lark/common.py | 9 +++--- lark/exceptions.py | 4 +-- lark/indenter.py | 8 +++--- lark/lark.py | 9 +++--- lark/lexer.py | 61 ++++++++++++++++++++++------------------ lark/parser_frontends.py | 2 +- lark/tree.py | 7 +++-- lark/utils.py | 2 +- 8 files changed, 56 insertions(+), 46 deletions(-) diff --git a/lark/common.py b/lark/common.py index ccd5e16..0e6ae05 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,8 +1,9 @@ +from types import ModuleType + from .utils import Serialize from .lexer import TerminalDef, Token ###{standalone -from types import ModuleType from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING if TYPE_CHECKING: @@ -17,13 +18,13 @@ class LexerConf(Serialize): terminals: Collection[TerminalDef] re_module: ModuleType ignore: Collection[str] = () - postlex: 'PostLex' = None - callbacks: Optional[Dict[str, _Callback]] = None + postlex: 'Optional[PostLex]' = None + callbacks: Dict[str, _Callback] = {} g_regex_flags: int = 0 skip_validation: bool = False use_bytes: bool = False - def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): + def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): self.terminals = terminals self.terminals_by_name = {t.name: t for t in self.terminals} assert len(self.terminals) == len(self.terminals_by_name) diff --git a/lark/exceptions.py b/lark/exceptions.py index 3276db5..9c4dc1e 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -3,7 +3,7 @@ from .utils import logger, NO_VALUE ###{standalone -from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING +from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING if TYPE_CHECKING: from .lexer import Token @@ -73,7 +73,7 @@ class UnexpectedInput(LarkError): after = text[pos:end].split(b'\n', 1)[0] return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") - def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: + def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: """Allows you to detect what's wrong in the input text by matching against example errors. diff --git a/lark/indenter.py b/lark/indenter.py index 03c5093..69a7ba4 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -14,12 +14,12 @@ class DedentError(LarkError): class Indenter(PostLex, ABC): - paren_level: Optional[int] - indent_level: Optional[List[int]] + paren_level: int + indent_level: List[int] def __init__(self) -> None: - self.paren_level = None - self.indent_level = None + self.paren_level = 0 + self.indent_level = [0] assert self.tab_len > 0 def handle_NL(self, token: Token) -> Iterator[Token]: diff --git a/lark/lark.py b/lark/lark.py index b4c767b..e225bad 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -15,7 +15,7 @@ from .grammar import Rule import re try: - import regex + import regex # type: ignore except ImportError: regex = None @@ -149,7 +149,7 @@ class LarkOptions(Serialize): # - As an attribute of `LarkOptions` above # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument - _defaults = { + _defaults: Dict[str, Any] = { 'debug': False, 'keep_all_tokens': False, 'tree_class': None, @@ -414,6 +414,7 @@ class Lark(Serialize): if cache_fn: logger.debug('Saving grammar to cache: %s', cache_fn) with FS.open(cache_fn, 'wb') as f: + assert cache_md5 is not None f.write(cache_md5.encode('utf8') + b'\n') pickle.dump(used_files, f) self.save(f) @@ -574,7 +575,7 @@ class Lark(Serialize): """Get information about a terminal""" return self._terminals_dict[name] - def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': + def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': """Start an interactive parsing session. Parameters: @@ -588,7 +589,7 @@ class Lark(Serialize): """ return self.parser.parse_interactive(text, start=start) - def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: + def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: """Parse the given text, according to the options provided. Parameters: diff --git a/lark/lexer.py b/lark/lexer.py index 6177d26..8f05bc7 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -23,10 +23,10 @@ class Pattern(Serialize, ABC): value: str flags: Collection[str] - raw: str = None - type: str = None + raw: Optional[str] = None + type: Optional[str] = None - def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: + def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: self.value = value self.flags = frozenset(flags) self.raw = raw @@ -81,7 +81,10 @@ class PatternStr(Pattern): @property def min_width(self) -> int: return len(self.value) - max_width = min_width + + @property + def max_width(self) -> int: + return len(self.value) class PatternRE(Pattern): @@ -320,15 +323,36 @@ def _regexp_has_newline(r): """ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) + +class LexerState(object): + __slots__ = 'text', 'line_ctr', 'last_token' + + def __init__(self, text, line_ctr, last_token=None): + self.text = text + self.line_ctr = line_ctr + self.last_token = last_token + + def __eq__(self, other): + if not isinstance(other, LexerState): + return NotImplemented + + return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token + + def __copy__(self): + return type(self)(self.text, copy(self.line_ctr), self.last_token) + + _Callback = Callable[[Token], Token] class Lexer(ABC): """Lexer interface Method Signatures: - lex(self, text) -> Iterator[Token] + lex(self, lexer_state, parser_state) -> Iterator[Token] """ - lex: Callable[..., Iterator[Token]] = NotImplemented + @abstractmethod + def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: + ... def make_lexer_state(self, text): line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') @@ -394,6 +418,7 @@ class TraditionalLexer(Lexer): def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: if self._mres is None: self._build() + assert self._mres is not None return self._mres def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: @@ -402,12 +427,12 @@ class TraditionalLexer(Lexer): if m: return m.group(0), type_from_index[m.lastindex] - def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: + def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: with suppress(EOFError): while True: yield self.next_token(state, parser_state) - def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: + def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) @@ -443,24 +468,6 @@ class TraditionalLexer(Lexer): raise EOFError(self) -class LexerState(object): - __slots__ = 'text', 'line_ctr', 'last_token' - - def __init__(self, text, line_ctr, last_token=None): - self.text = text - self.line_ctr = line_ctr - self.last_token = last_token - - def __eq__(self, other): - if not isinstance(other, LexerState): - return NotImplemented - - return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token - - def __copy__(self): - return type(self)(self.text, copy(self.line_ctr), self.last_token) - - class ContextualLexer(Lexer): lexers: Dict[str, TraditionalLexer] @@ -494,7 +501,7 @@ class ContextualLexer(Lexer): def make_lexer_state(self, text): return self.root_lexer.make_lexer_state(text) - def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: + def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: try: while True: lexer = self.lexers[parser_state.position] diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index e066d9a..926997a 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser from .tree import Tree from .common import LexerConf, ParserConf try: - import regex + import regex # type: ignore except ImportError: regex = None import re diff --git a/lark/tree.py b/lark/tree.py index ff56dea..90ec0fe 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -1,8 +1,9 @@ try: - from future_builtins import filter + from future_builtins import filter # type: ignore except ImportError: pass +import sys from copy import deepcopy @@ -49,7 +50,7 @@ class Tree(object): data: str children: 'List[Union[str, Tree]]' - def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: + def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: self.data = data self.children = children self._meta = meta @@ -196,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): possible attributes, see https://www.graphviz.org/doc/info/attrs.html. """ - import pydot + import pydot # type: ignore graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) i = [0] diff --git a/lark/utils.py b/lark/utils.py index 81c9128..1214e97 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -134,7 +134,7 @@ def smart_decorator(f, create_decorator): try: - import regex + import regex # type: ignore except ImportError: regex = None From 804114d1ff3a2c982e8fb8793936b9122f1b8fb4 Mon Sep 17 00:00:00 2001 From: Chanic Panic Date: Wed, 30 Jun 2021 13:21:09 -0700 Subject: [PATCH 9/9] Remove default values on type declarations and mark ClassVars --- lark/common.py | 12 ++++++------ lark/grammar.py | 8 ++++---- lark/lexer.py | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lark/common.py b/lark/common.py index 0e6ae05..6ad38fc 100644 --- a/lark/common.py +++ b/lark/common.py @@ -17,12 +17,12 @@ class LexerConf(Serialize): terminals: Collection[TerminalDef] re_module: ModuleType - ignore: Collection[str] = () - postlex: 'Optional[PostLex]' = None - callbacks: Dict[str, _Callback] = {} - g_regex_flags: int = 0 - skip_validation: bool = False - use_bytes: bool = False + ignore: Collection[str] + postlex: 'Optional[PostLex]' + callbacks: Dict[str, _Callback] + g_regex_flags: int + skip_validation: bool + use_bytes: bool def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): self.terminals = terminals diff --git a/lark/grammar.py b/lark/grammar.py index be1aff6..25aec17 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -3,13 +3,13 @@ from .utils import Serialize ###{standalone -from typing import Optional, Tuple +from typing import Optional, Tuple, ClassVar class Symbol(Serialize): __slots__ = ('name',) name: str - is_term: bool = NotImplemented + is_term: ClassVar[bool] = NotImplemented def __init__(self, name: str) -> None: self.name = name @@ -33,7 +33,7 @@ class Symbol(Serialize): class Terminal(Symbol): __serialize_fields__ = 'name', 'filter_out' - is_term = True + is_term: ClassVar[bool] = True def __init__(self, name, filter_out=False): self.name = name @@ -47,7 +47,7 @@ class Terminal(Symbol): class NonTerminal(Symbol): __serialize_fields__ = 'name', - is_term = False + is_term: ClassVar[bool] = False class RuleOptions(Serialize): diff --git a/lark/lexer.py b/lark/lexer.py index 8f05bc7..af698cb 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -13,7 +13,7 @@ from copy import copy from types import ModuleType from typing import ( TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, - Pattern as REPattern, TYPE_CHECKING + Pattern as REPattern, ClassVar, TYPE_CHECKING ) if TYPE_CHECKING: @@ -23,8 +23,8 @@ class Pattern(Serialize, ABC): value: str flags: Collection[str] - raw: Optional[str] = None - type: Optional[str] = None + raw: Optional[str] + type: ClassVar[str] def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: self.value = value @@ -73,7 +73,7 @@ class Pattern(Serialize, ABC): class PatternStr(Pattern): __serialize_fields__ = 'value', 'flags' - type: str = "str" + type: ClassVar[str] = "str" def to_regexp(self) -> str: return self._get_flags(re.escape(self.value)) @@ -90,7 +90,7 @@ class PatternStr(Pattern): class PatternRE(Pattern): __serialize_fields__ = 'value', 'flags', '_width' - type: str = "re" + type: ClassVar[str] = "re" def to_regexp(self) -> str: return self._get_flags(self.value)