Explorar el Código

Corrections for PR and some mypy errors

gm/2021-09-23T00Z/github.com--lark-parser-lark/1.0b
Chanic Panic hace 3 años
padre
commit
4bc9445238
Se han modificado 8 ficheros con 56 adiciones y 46 borrados
  1. +5
    -4
      lark/common.py
  2. +2
    -2
      lark/exceptions.py
  3. +4
    -4
      lark/indenter.py
  4. +5
    -4
      lark/lark.py
  5. +34
    -27
      lark/lexer.py
  6. +1
    -1
      lark/parser_frontends.py
  7. +4
    -3
      lark/tree.py
  8. +1
    -1
      lark/utils.py

+ 5
- 4
lark/common.py Ver fichero

@@ -1,8 +1,9 @@
from types import ModuleType

from .utils import Serialize
from .lexer import TerminalDef, Token

###{standalone
from types import ModuleType
from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING:
@@ -17,13 +18,13 @@ class LexerConf(Serialize):
terminals: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str] = ()
postlex: 'PostLex' = None
callbacks: Optional[Dict[str, _Callback]] = None
postlex: 'Optional[PostLex]' = None
callbacks: Dict[str, _Callback] = {}
g_regex_flags: int = 0
skip_validation: bool = False
use_bytes: bool = False

def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
self.terminals = terminals
self.terminals_by_name = {t.name: t for t in self.terminals}
assert len(self.terminals) == len(self.terminals_by_name)


+ 2
- 2
lark/exceptions.py Ver fichero

@@ -3,7 +3,7 @@ from .utils import logger, NO_VALUE

###{standalone

from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import Token
@@ -73,7 +73,7 @@ class UnexpectedInput(LarkError):
after = text[pos:end].split(b'\n', 1)[0]
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")

def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T:
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]:
"""Allows you to detect what's wrong in the input text by matching
against example errors.



+ 4
- 4
lark/indenter.py Ver fichero

@@ -14,12 +14,12 @@ class DedentError(LarkError):

class Indenter(PostLex, ABC):

paren_level: Optional[int]
indent_level: Optional[List[int]]
paren_level: int
indent_level: List[int]

def __init__(self) -> None:
self.paren_level = None
self.indent_level = None
self.paren_level = 0
self.indent_level = [0]
assert self.tab_len > 0

def handle_NL(self, token: Token) -> Iterator[Token]:


+ 5
- 4
lark/lark.py Ver fichero

@@ -15,7 +15,7 @@ from .grammar import Rule

import re
try:
import regex
import regex # type: ignore
except ImportError:
regex = None

@@ -149,7 +149,7 @@ class LarkOptions(Serialize):
# - As an attribute of `LarkOptions` above
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults = {
_defaults: Dict[str, Any] = {
'debug': False,
'keep_all_tokens': False,
'tree_class': None,
@@ -414,6 +414,7 @@ class Lark(Serialize):
if cache_fn:
logger.debug('Saving grammar to cache: %s', cache_fn)
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f)
@@ -574,7 +575,7 @@ class Lark(Serialize):
"""Get information about a terminal"""
return self._terminals_dict[name]
def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser':
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
"""Start an interactive parsing session.

Parameters:
@@ -588,7 +589,7 @@ class Lark(Serialize):
"""
return self.parser.parse_interactive(text, start=start)

def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree:
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree:
"""Parse the given text, according to the options provided.

Parameters:


+ 34
- 27
lark/lexer.py Ver fichero

@@ -23,10 +23,10 @@ class Pattern(Serialize, ABC):

value: str
flags: Collection[str]
raw: str = None
type: str = None
raw: Optional[str] = None
type: Optional[str] = None

def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None:
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
self.value = value
self.flags = frozenset(flags)
self.raw = raw
@@ -81,7 +81,10 @@ class PatternStr(Pattern):
@property
def min_width(self) -> int:
return len(self.value)
max_width = min_width

@property
def max_width(self) -> int:
return len(self.value)


class PatternRE(Pattern):
@@ -320,15 +323,36 @@ def _regexp_has_newline(r):
"""
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)


class LexerState(object):
__slots__ = 'text', 'line_ctr', 'last_token'

def __init__(self, text, line_ctr, last_token=None):
self.text = text
self.line_ctr = line_ctr
self.last_token = last_token

def __eq__(self, other):
if not isinstance(other, LexerState):
return NotImplemented

return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)


_Callback = Callable[[Token], Token]

class Lexer(ABC):
"""Lexer interface

Method Signatures:
lex(self, text) -> Iterator[Token]
lex(self, lexer_state, parser_state) -> Iterator[Token]
"""
lex: Callable[..., Iterator[Token]] = NotImplemented
@abstractmethod
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
...

def make_lexer_state(self, text):
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n')
@@ -394,6 +418,7 @@ class TraditionalLexer(Lexer):
def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]:
if self._mres is None:
self._build()
assert self._mres is not None
return self._mres

def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]:
@@ -402,12 +427,12 @@ class TraditionalLexer(Lexer):
if m:
return m.group(0), type_from_index[m.lastindex]

def lex(self, state: Any, parser_state: Any) -> Iterator[Token]:
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
with suppress(EOFError):
while True:
yield self.next_token(state, parser_state)

def next_token(self, lex_state: Any, parser_state: Any=None) -> Token:
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
line_ctr = lex_state.line_ctr
while line_ctr.char_pos < len(lex_state.text):
res = self.match(lex_state.text, line_ctr.char_pos)
@@ -443,24 +468,6 @@ class TraditionalLexer(Lexer):
raise EOFError(self)


class LexerState(object):
__slots__ = 'text', 'line_ctr', 'last_token'

def __init__(self, text, line_ctr, last_token=None):
self.text = text
self.line_ctr = line_ctr
self.last_token = last_token

def __eq__(self, other):
if not isinstance(other, LexerState):
return NotImplemented

return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)


class ContextualLexer(Lexer):

lexers: Dict[str, TraditionalLexer]
@@ -494,7 +501,7 @@ class ContextualLexer(Lexer):
def make_lexer_state(self, text):
return self.root_lexer.make_lexer_state(text)

def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]:
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
try:
while True:
lexer = self.lexers[parser_state.position]


+ 1
- 1
lark/parser_frontends.py Ver fichero

@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser
from .tree import Tree
from .common import LexerConf, ParserConf
try:
import regex
import regex # type: ignore
except ImportError:
regex = None
import re


+ 4
- 3
lark/tree.py Ver fichero

@@ -1,8 +1,9 @@
try:
from future_builtins import filter
from future_builtins import filter # type: ignore
except ImportError:
pass

import sys
from copy import deepcopy


@@ -49,7 +50,7 @@ class Tree(object):
data: str
children: 'List[Union[str, Tree]]'

def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None:
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None:
self.data = data
self.children = children
self._meta = meta
@@ -196,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
"""

import pydot
import pydot # type: ignore
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)

i = [0]


+ 1
- 1
lark/utils.py Ver fichero

@@ -134,7 +134,7 @@ def smart_decorator(f, create_decorator):


try:
import regex
import regex # type: ignore
except ImportError:
regex = None



Cargando…
Cancelar
Guardar