Ver código fonte

Merge branch 'get_puppet' of https://github.com/MegaIng/lark into MegaIng-get_puppet

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 anos atrás
pai
commit
baba79fb22
7 arquivos alterados com 115 adições e 13 exclusões
  1. +8
    -1
      lark-stubs/lark.pyi
  2. +21
    -0
      lark-stubs/parsers/lalr_puppet.pyi
  3. +3
    -0
      lark/lark.py
  4. +14
    -3
      lark/parser_frontends.py
  5. +10
    -0
      lark/parsers/lalr_parser.py
  6. +23
    -1
      lark/parsers/lalr_puppet.py
  7. +36
    -8
      tests/test_parser.py

+ 8
- 1
lark-stubs/lark.pyi Ver arquivo

@@ -2,8 +2,10 @@

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol, Tuple, Iterable,
Literal, Protocol, Tuple, Iterable,
)

from .parsers.lalr_puppet import ParserPuppet
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
from .tree import Tree
@@ -12,6 +14,7 @@ from .load_grammar import Grammar

_T = TypeVar('_T')


class PostLex(Protocol):

def process(self, stream: Iterator[Token]) -> Iterator[Token]:
@@ -46,6 +49,7 @@ class PackageResource(object):

def __init__(self, pkg_name: str, path: str): ...


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...

@@ -87,6 +91,9 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
...

def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
...

@classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
...


+ 21
- 0
lark-stubs/parsers/lalr_puppet.pyi Ver arquivo

@@ -9,9 +9,20 @@ class ParserPuppet(object):

Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
"""
parser: Any
parser_state: Any
lexer_state: Any

def feed_token(self, token: Token) -> Any: ...

def exhaust_lexer(self) -> None: ...

def feed_eof(self, last_token: Token = None) -> Any: ...

def copy(self) -> ParserPuppet: ...
def as_immutable(self) -> ImmutableParserPuppet: ...

def pretty(self) -> str: ...

@@ -20,3 +31,13 @@ class ParserPuppet(object):
def accepts(self) -> Set[str]: ...

def resume_parse(self) -> Tree: ...


class ImmutableParserPuppet(ParserPuppet):
result: Any = None

def feed_token(self, token: Token) -> ImmutableParserPuppet: ...

def exhaust_lexer(self) -> ImmutableParserPuppet: ...

def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ...

+ 3
- 0
lark/lark.py Ver arquivo

@@ -531,6 +531,9 @@ class Lark(Serialize):
def get_terminal(self, name):
"Get information about a terminal"
return self._terminals_dict[name]
def get_puppet(self, text=None, start=None):
return self.parser.get_puppet(text, start=start)

def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided.


+ 14
- 3
lark/parser_frontends.py Ver arquivo

@@ -89,18 +89,29 @@ class ParsingFrontend(Serialize):

if lexer_conf.postlex:
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)


def parse(self, text, start=None, on_error=None):
def _verify_start(self, start=None):
if start is None:
start = self.parser_conf.start
if len(start) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start
elif start not in self.parser_conf.start:
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start

def parse(self, text, start=None, on_error=None):
start = self._verify_start(start)
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw)
def get_puppet(self, text=None, start=None):
start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.")
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.get_puppet(stream, start)


def get_frontend(parser, lexer):


+ 10
- 0
lark/parsers/lalr_parser.py Ver arquivo

@@ -32,6 +32,10 @@ class LALR_Parser(Serialize):

def serialize(self, memo):
return self._parse_table.serialize(memo)
def get_puppet(self, lexer, start):
return self.parser.get_puppet(lexer, start)

def parse(self, lexer, start, on_error=None):
try:
@@ -158,10 +162,16 @@ class _Parser(object):
self.callbacks = callbacks
self.debug = debug

def get_puppet(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return ParserPuppet(self, parser_state, parser_state.lexer)
def parse(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return self.parse_from_state(parser_state)

def parse_from_state(self, state):
# Main LALR-parser loop


+ 23
- 1
lark/parsers/lalr_puppet.py Ver arquivo

@@ -23,6 +23,19 @@ class ParserPuppet(object):
Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
Note that this modifies the puppet in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


def __copy__(self):
"""Create a new puppet with a separate state.
@@ -93,4 +106,13 @@ class ImmutableParserPuppet(ParserPuppet):
def feed_token(self, token):
c = copy(self)
c.result = ParserPuppet.feed_token(c, token)
return c
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.

Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token"""
res = copy(self)
for token in res.lexer_state.lex(res.parser_state):
res = res.parser_state.feed_token(token)
return res

+ 36
- 8
tests/test_parser.py Ver arquivo

@@ -2395,6 +2395,42 @@ def _make_parser_test(LEXER, PARSER):
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment")
def test_parser_puppet(self):

g = _Lark(r'''
start: A+ B*
A: "a"
B: "b"
''')
puppet = g.get_puppet()

self.assertRaises(UnexpectedToken, puppet.feed_eof)
self.assertRaises(TypeError, puppet.exhaust_lexer)
puppet.feed_token(Token('A', 'a'))
res = puppet.feed_eof()
self.assertEqual(res, Tree('start', ['a']))

puppet = g.get_puppet("ab")

puppet.exhaust_lexer()

puppet_copy = puppet.copy()
self.assertEqual(puppet_copy.parser_state, puppet.parser_state)
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state)
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr)

res = puppet.feed_eof(puppet.lexer_state.state.last_token)
self.assertEqual(res, Tree('start', ['a', 'b']))
self.assertRaises(UnexpectedToken ,puppet.feed_eof)
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a'))
puppet_copy.feed_token(Token('B', 'b'))
res = puppet_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))

@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")
def test_error_with_puppet(self):
@@ -2408,14 +2444,6 @@ def _make_parser_test(LEXER, PARSER):
# Skip comma
return True
elif e.token.type == 'SIGNED_NUMBER':
# Make a copy and ensure it is properly made
puppet_copy = e.puppet.copy()
assert puppet_copy.parser_state == e.puppet.parser_state
assert puppet_copy.lexer_state.state == e.puppet.lexer_state.state
assert puppet_copy.parser_state is not e.puppet.parser_state
assert puppet_copy.lexer_state.state is not e.puppet.lexer_state.state
assert puppet_copy.lexer_state.state.line_ctr is not e.puppet.lexer_state.state.line_ctr

# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)


Carregando…
Cancelar
Salvar