Browse Source

Merge branch 'get_puppet' of https://github.com/MegaIng/lark into MegaIng-get_puppet

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 years ago
parent
commit
baba79fb22
7 changed files with 115 additions and 13 deletions
  1. +8
    -1
      lark-stubs/lark.pyi
  2. +21
    -0
      lark-stubs/parsers/lalr_puppet.pyi
  3. +3
    -0
      lark/lark.py
  4. +14
    -3
      lark/parser_frontends.py
  5. +10
    -0
      lark/parsers/lalr_parser.py
  6. +23
    -1
      lark/parsers/lalr_puppet.py
  7. +36
    -8
      tests/test_parser.py

+ 8
- 1
lark-stubs/lark.pyi View File

@@ -2,8 +2,10 @@


from typing import ( from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol, Tuple, Iterable,
Literal, Protocol, Tuple, Iterable,
) )

from .parsers.lalr_puppet import ParserPuppet
from .visitors import Transformer from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef from .lexer import Token, Lexer, TerminalDef
from .tree import Tree from .tree import Tree
@@ -12,6 +14,7 @@ from .load_grammar import Grammar


_T = TypeVar('_T') _T = TypeVar('_T')



class PostLex(Protocol): class PostLex(Protocol):


def process(self, stream: Iterator[Token]) -> Iterator[Token]: def process(self, stream: Iterator[Token]) -> Iterator[Token]:
@@ -46,6 +49,7 @@ class PackageResource(object):


def __init__(self, pkg_name: str, path: str): ... def __init__(self, pkg_name: str, path: str): ...



class FromPackageLoader: class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...


@@ -87,6 +91,9 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
... ...


def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
...

@classmethod @classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
... ...


+ 21
- 0
lark-stubs/parsers/lalr_puppet.pyi View File

@@ -9,9 +9,20 @@ class ParserPuppet(object):


Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
""" """
parser: Any
parser_state: Any
lexer_state: Any

def feed_token(self, token: Token) -> Any: ... def feed_token(self, token: Token) -> Any: ...


def exhaust_lexer(self) -> None: ...

def feed_eof(self, last_token: Token = None) -> Any: ...

def copy(self) -> ParserPuppet: ... def copy(self) -> ParserPuppet: ...
def as_immutable(self) -> ImmutableParserPuppet: ...


def pretty(self) -> str: ... def pretty(self) -> str: ...


@@ -20,3 +31,13 @@ class ParserPuppet(object):
def accepts(self) -> Set[str]: ... def accepts(self) -> Set[str]: ...


def resume_parse(self) -> Tree: ... def resume_parse(self) -> Tree: ...


class ImmutableParserPuppet(ParserPuppet):
result: Any = None

def feed_token(self, token: Token) -> ImmutableParserPuppet: ...

def exhaust_lexer(self) -> ImmutableParserPuppet: ...

def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ...

+ 3
- 0
lark/lark.py View File

@@ -531,6 +531,9 @@ class Lark(Serialize):
def get_terminal(self, name): def get_terminal(self, name):
"Get information about a terminal" "Get information about a terminal"
return self._terminals_dict[name] return self._terminals_dict[name]
def get_puppet(self, text=None, start=None):
return self.parser.get_puppet(text, start=start)


def parse(self, text, start=None, on_error=None): def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided. """Parse the given text, according to the options provided.


+ 14
- 3
lark/parser_frontends.py View File

@@ -89,18 +89,29 @@ class ParsingFrontend(Serialize):


if lexer_conf.postlex: if lexer_conf.postlex:
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)


def parse(self, text, start=None, on_error=None):
def _verify_start(self, start=None):
if start is None: if start is None:
start = self.parser_conf.start start = self.parser_conf.start
if len(start) > 1: if len(start) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start start ,= start
elif start not in self.parser_conf.start:
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start


def parse(self, text, start=None, on_error=None):
start = self._verify_start(start)
stream = text if self.skip_lexer else LexerThread(self.lexer, text) stream = text if self.skip_lexer else LexerThread(self.lexer, text)
kw = {} if on_error is None else {'on_error': on_error} kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw) return self.parser.parse(stream, start, **kw)
def get_puppet(self, text=None, start=None):
start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.")
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.get_puppet(stream, start)




def get_frontend(parser, lexer): def get_frontend(parser, lexer):


+ 10
- 0
lark/parsers/lalr_parser.py View File

@@ -32,6 +32,10 @@ class LALR_Parser(Serialize):


def serialize(self, memo): def serialize(self, memo):
return self._parse_table.serialize(memo) return self._parse_table.serialize(memo)
def get_puppet(self, lexer, start):
return self.parser.get_puppet(lexer, start)


def parse(self, lexer, start, on_error=None): def parse(self, lexer, start, on_error=None):
try: try:
@@ -158,10 +162,16 @@ class _Parser(object):
self.callbacks = callbacks self.callbacks = callbacks
self.debug = debug self.debug = debug


def get_puppet(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return ParserPuppet(self, parser_state, parser_state.lexer)
def parse(self, lexer, start, value_stack=None, state_stack=None): def parse(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start) parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return self.parse_from_state(parser_state) return self.parse_from_state(parser_state)


def parse_from_state(self, state): def parse_from_state(self, state):
# Main LALR-parser loop # Main LALR-parser loop


+ 23
- 1
lark/parsers/lalr_puppet.py View File

@@ -23,6 +23,19 @@ class ParserPuppet(object):
Note that ``token`` has to be an instance of ``Token``. Note that ``token`` has to be an instance of ``Token``.
""" """
return self.parser_state.feed_token(token, token.type == '$END') return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
Note that this modifies the puppet in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)



def __copy__(self): def __copy__(self):
"""Create a new puppet with a separate state. """Create a new puppet with a separate state.
@@ -93,4 +106,13 @@ class ImmutableParserPuppet(ParserPuppet):
def feed_token(self, token): def feed_token(self, token):
c = copy(self) c = copy(self)
c.result = ParserPuppet.feed_token(c, token) c.result = ParserPuppet.feed_token(c, token)
return c
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.

Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token"""
res = copy(self)
for token in res.lexer_state.lex(res.parser_state):
res = res.parser_state.feed_token(token)
return res

+ 36
- 8
tests/test_parser.py View File

@@ -2395,6 +2395,42 @@ def _make_parser_test(LEXER, PARSER):
""", regex=True) """, regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')


@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment")
def test_parser_puppet(self):

g = _Lark(r'''
start: A+ B*
A: "a"
B: "b"
''')
puppet = g.get_puppet()

self.assertRaises(UnexpectedToken, puppet.feed_eof)
self.assertRaises(TypeError, puppet.exhaust_lexer)
puppet.feed_token(Token('A', 'a'))
res = puppet.feed_eof()
self.assertEqual(res, Tree('start', ['a']))

puppet = g.get_puppet("ab")

puppet.exhaust_lexer()

puppet_copy = puppet.copy()
self.assertEqual(puppet_copy.parser_state, puppet.parser_state)
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state)
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr)

res = puppet.feed_eof(puppet.lexer_state.state.last_token)
self.assertEqual(res, Tree('start', ['a', 'b']))
self.assertRaises(UnexpectedToken ,puppet.feed_eof)
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a'))
puppet_copy.feed_token(Token('B', 'b'))
res = puppet_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))


@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")
def test_error_with_puppet(self): def test_error_with_puppet(self):
@@ -2408,14 +2444,6 @@ def _make_parser_test(LEXER, PARSER):
# Skip comma # Skip comma
return True return True
elif e.token.type == 'SIGNED_NUMBER': elif e.token.type == 'SIGNED_NUMBER':
# Make a copy and ensure it is properly made
puppet_copy = e.puppet.copy()
assert puppet_copy.parser_state == e.puppet.parser_state
assert puppet_copy.lexer_state.state == e.puppet.lexer_state.state
assert puppet_copy.parser_state is not e.puppet.parser_state
assert puppet_copy.lexer_state.state is not e.puppet.lexer_state.state
assert puppet_copy.lexer_state.state.line_ctr is not e.puppet.lexer_state.state.line_ctr

# Try to feed a comma and retry the number # Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ',')) e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token) e.puppet.feed_token(e.token)


Loading…
Cancel
Save