瀏覽代碼

More .pyi merging

exceptions, lark, and tree
gm/2021-09-23T00Z/github.com--lark-parser-lark/1.0b
Chanic Panic 3 年之前
父節點
當前提交
089bc2b523
共有 7 個文件被更改,包括 109 次插入214 次删除
  1. +0
    -65
      lark-stubs/exceptions.pyi
  2. +3
    -59
      lark-stubs/lark.pyi
  3. +2
    -60
      lark-stubs/tree.pyi
  4. +27
    -3
      lark/exceptions.py
  5. +42
    -14
      lark/lark.py
  6. +6
    -2
      lark/load_grammar.py
  7. +29
    -11
      lark/tree.py

+ 0
- 65
lark-stubs/exceptions.pyi 查看文件

@@ -1,65 +0,0 @@
# -*- coding: utf-8 -*-

from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
from .tree import Tree
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser

class LarkError(Exception):
pass


class ConfigurationError(LarkError, ValueError):
pass


class GrammarError(LarkError):
pass


class ParseError(LarkError):
pass


class LexError(LarkError):
pass


T = TypeVar('T')

class UnexpectedEOF(ParseError):
expected: List[Token]

class UnexpectedInput(LarkError):
line: int
column: int
pos_in_stream: int
state: Any

def get_context(self, text: str, span: int = ...) -> str:
...

def match_examples(
self,
parse_fn: Callable[[str], Tree],
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
token_type_match_fallback: bool = False,
use_accepts: bool = False,
) -> T:
...


class UnexpectedToken(ParseError, UnexpectedInput):
expected: Set[str]
considered_rules: Set[str]
interactive_parser: InteractiveParser
accepts: Set[str]

class UnexpectedCharacters(LexError, UnexpectedInput):
allowed: Set[str]
considered_tokens: Set[Any]


class VisitError(LarkError):
obj: Union[Tree, Token]
orig_exc: Exception

+ 3
- 59
lark-stubs/lark.pyi 查看文件

@@ -1,19 +1,13 @@
# -*- coding: utf-8 -*-

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol, Tuple, Iterable,
)

from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
from .tree import Tree
from .exceptions import UnexpectedInput
from .load_grammar import Grammar

_T = TypeVar('_T')

from .load_grammar import Grammar, PackageResource

class PostLex(Protocol):

@@ -22,39 +16,8 @@ class PostLex(Protocol):

always_accept: Iterable[str]


class LarkOptions:
start: List[str]
parser: str
lexer: str
transformer: Optional[Transformer]
postlex: Optional[PostLex]
ambiguity: str
regex: bool
debug: bool
keep_all_tokens: bool
propagate_positions: Union[bool, str]
maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str]
g_regex_flags: int
use_bytes: bool
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]
source_path: Optional[str]


class PackageResource(object):
pkg_name: str
path: str

def __init__(self, pkg_name: str, path: str): ...


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...

def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ...

...

class Lark:
source_path: str
@@ -88,22 +51,3 @@ class Lark:
):
...

def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
...

def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
...

@classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
...

@classmethod
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
...

def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
...

def get_terminal(self, name: str) -> TerminalDef:
...

+ 2
- 60
lark-stubs/tree.pyi 查看文件

@@ -1,67 +1,9 @@
# -*- coding: utf-8 -*-

from typing import List, Callable, Iterator, Union, Optional, Literal, Any
from .lexer import TerminalDef

class Meta:
empty: bool
line: int
column: int
start_pos: int
end_line: int
end_column: int
end_pos: int
orig_expansion: List[TerminalDef]
match_tree: bool

from typing import Literal

class Tree:
data: str
children: List[Union[str, Tree]]
meta: Meta

def __init__(
self,
data: str,
children: List[Union[str, Tree]],
meta: Optional[Meta] = None
) -> None:
...

def pretty(self, indent_str: str = ...) -> str:
...

def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]:
...

def find_data(self, data: str) -> Iterator[Tree]:
...

def expand_kids_by_index(self, *indices: int) -> None:
...

def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]:
...

def iter_subtrees(self) -> Iterator[Tree]:
...

def iter_subtrees_topdown(self) -> Iterator[Tree]:
...

def copy(self) -> Tree:
...

def set(self, data: str, children: List[Union[str, Tree]]) -> None:
...

def __hash__(self) -> int:
...


class SlottedTree(Tree):
pass

...

def pydot__tree_to_png(
tree: Tree,


+ 27
- 3
lark/exceptions.py 查看文件

@@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE

###{standalone

from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser
from .tree import Tree

class LarkError(Exception):
pass
@@ -28,6 +34,7 @@ class ParseError(LarkError):
class LexError(LarkError):
pass

T = TypeVar('T')

class UnexpectedInput(LarkError):
"""UnexpectedInput Error.
@@ -39,10 +46,13 @@ class UnexpectedInput(LarkError):

After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
"""
line: int
column: int
pos_in_stream = None
state: Any
_terminals_by_name = None

def get_context(self, text, span=40):
def get_context(self, text: str, span: int=40) -> str:
"""Returns a pretty string pinpointing the error in the text,
with span amount of context characters around it.

@@ -63,7 +73,7 @@ class UnexpectedInput(LarkError):
after = text[pos:end].split(b'\n', 1)[0]
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")

def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T:
"""Allows you to detect what's wrong in the input text by matching
against example errors.

@@ -126,6 +136,9 @@ class UnexpectedInput(LarkError):


class UnexpectedEOF(ParseError, UnexpectedInput):

expected: 'List[Token]'

def __init__(self, expected, state=None, terminals_by_name=None):
self.expected = expected
self.state = state
@@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput):


class UnexpectedCharacters(LexError, UnexpectedInput):

allowed: Set[str]
considered_tokens: Set[Any]

def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
terminals_by_name=None, considered_rules=None):
# TODO considered_tokens and allowed can be figured out using state
@@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput):
see: ``InteractiveParser``.
"""

expected: Set[str]
considered_rules: Set[str]
interactive_parser: 'InteractiveParser'

def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
# TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?')
@@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
super(UnexpectedToken, self).__init__()

@property
def accepts(self):
def accepts(self) -> Set[str]:
if self._accepts is NO_VALUE:
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
return self._accepts
@@ -228,6 +249,9 @@ class VisitError(LarkError):
- orig_exc: the exception that cause it to fail
"""

obj: 'Union[Tree, Token]'
orig_exc: Exception

def __init__(self, rule, obj, orig_exc):
self.obj = obj
self.orig_exc = orig_exc


+ 42
- 14
lark/lark.py 查看文件

@@ -1,6 +1,10 @@
from abc import ABC, abstractmethod
import sys, os, pickle, hashlib
import tempfile
from typing import (
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
Tuple, Iterable, TYPE_CHECKING
)

from .exceptions import ConfigurationError, assert_config
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
@@ -8,7 +12,7 @@ from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_
from .tree import Tree
from .common import LexerConf, ParserConf

from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend, _get_lexer_callbacks
from .grammar import Rule
@@ -19,14 +23,44 @@ try:
except ImportError:
regex = None

if TYPE_CHECKING:
from .load_grammar import PackageResource
from .exceptions import UnexpectedInput
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer

###{standalone

class PostLex(ABC):
@abstractmethod
def process(self, stream):
return stream

always_accept = ()

class LarkOptions(Serialize):
"""Specifies the options for Lark

"""

start: List[str]
parser: str
lexer: str
transformer: 'Optional[Transformer]'
postlex: Optional[PostLex]
ambiguity: str
regex: bool
debug: bool
keep_all_tokens: bool
propagate_positions: Union[bool, str]
maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str]
g_regex_flags: int
use_bytes: bool
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
source_path: Optional[str]

OPTIONS_DOC = """
**=== General Options ===**

@@ -189,13 +223,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


class PostLex(ABC):
@abstractmethod
def process(self, stream):
return stream

always_accept = ()

_T = TypeVar('_T')

class Lark(Serialize):
"""Main interface for the library.
@@ -476,7 +504,7 @@ class Lark(Serialize):
return inst._load({'data': data, 'memo': memo}, **kwargs)

@classmethod
def open(cls, grammar_filename, rel_to=None, **options):
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
"""Create an instance of Lark with the grammar given by its filename

If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
@@ -494,7 +522,7 @@ class Lark(Serialize):
return cls(f, **options)

@classmethod
def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T:
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.

@@ -515,7 +543,7 @@ class Lark(Serialize):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


def lex(self, text, dont_ignore=False):
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'

When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
@@ -530,11 +558,11 @@ class Lark(Serialize):
return self.options.postlex.process(stream)
return stream

def get_terminal(self, name):
def get_terminal(self, name: str) -> TerminalDef:
"""Get information about a terminal"""
return self._terminals_dict[name]
def parse_interactive(self, text=None, start=None):
def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser':
"""Start an interactive parsing session.

Parameters:
@@ -548,7 +576,7 @@ class Lark(Serialize):
"""
return self.parser.parse_interactive(text, start=start)

def parse(self, text, start=None, on_error=None):
def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree:
"""Parse the given text, according to the options provided.

Parameters:


+ 6
- 2
lark/load_grammar.py 查看文件

@@ -691,14 +691,18 @@ class FromPackageLoader(object):
pkg_name: The name of the package. You can probably provide `__name__` most of the time
search_paths: All the path that will be search on absolute imports.
"""
def __init__(self, pkg_name, search_paths=("", )):

pkg_name: str
search_paths: Tuple[str, ...]

def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None:
self.pkg_name = pkg_name
self.search_paths = search_paths

def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

def __call__(self, base_path, grammar_path):
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:
if base_path is None:
to_try = self.search_paths
else:


+ 29
- 11
lark/tree.py 查看文件

@@ -8,9 +8,23 @@ from copy import deepcopy

###{standalone
from collections import OrderedDict
from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import TerminalDef

class Meta:

empty: bool
line: int
column: int
start_pos: int
end_line: int
end_column: int
end_pos: int
orig_expansion: 'List[TerminalDef]'
match_tree: bool

def __init__(self):
self.empty = True

@@ -27,13 +41,17 @@ class Tree(object):
meta: Line & Column numbers (if ``propagate_positions`` is enabled).
meta attributes: line, column, start_pos, end_line, end_column, end_pos
"""
def __init__(self, data, children, meta=None):

data: str
children: 'List[Union[str, Tree]]'

def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None:
self.data = data
self.children = children
self._meta = meta

@property
def meta(self):
def meta(self) -> Meta:
if self._meta is None:
self._meta = Meta()
return self._meta
@@ -57,7 +75,7 @@ class Tree(object):

return l

def pretty(self, indent_str=' '):
def pretty(self, indent_str: str=' ') -> str:
"""Returns an indented string representation of the tree.

Great for debugging.
@@ -73,10 +91,10 @@ class Tree(object):
def __ne__(self, other):
return not (self == other)

def __hash__(self):
def __hash__(self) -> int:
return hash((self.data, tuple(self.children)))

def iter_subtrees(self):
def iter_subtrees(self) -> 'Iterator[Tree]':
"""Depth-first iteration.

Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
@@ -91,23 +109,23 @@ class Tree(object):
del queue
return reversed(list(subtrees.values()))

def find_pred(self, pred):
def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]':
"""Returns all nodes of the tree that evaluate pred(node) as true."""
return filter(pred, self.iter_subtrees())

def find_data(self, data):
def find_data(self, data: str) -> 'Iterator[Tree]':
"""Returns all nodes of the tree whose data equals the given data."""
return self.find_pred(lambda t: t.data == data)

###}

def expand_kids_by_index(self, *indices):
def expand_kids_by_index(self, *indices: int) -> None:
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i]
self.children[i:i+1] = kid.children

def scan_values(self, pred):
def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]:
"""Return all values in the tree that evaluate pred(value) as true.

This can be used to find all the tokens in the tree.
@@ -140,10 +158,10 @@ class Tree(object):
def __deepcopy__(self, memo):
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)

def copy(self):
def copy(self) -> 'Tree':
return type(self)(self.data, self.children)

def set(self, data, children):
def set(self, data: str, children: 'List[Union[str, Tree]]') -> None:
self.data = data
self.children = children



Loading…
取消
儲存