@@ -2,8 +2,10 @@ | |||||
from typing import ( | from typing import ( | ||||
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | ||||
Literal, Protocol, Tuple, Iterable, | |||||
Literal, Protocol, Tuple, Iterable, | |||||
) | ) | ||||
from .parsers.lalr_puppet import ParserPuppet | |||||
from .visitors import Transformer | from .visitors import Transformer | ||||
from .lexer import Token, Lexer, TerminalDef | from .lexer import Token, Lexer, TerminalDef | ||||
from .tree import Tree | from .tree import Tree | ||||
@@ -12,6 +14,7 @@ from .load_grammar import Grammar | |||||
_T = TypeVar('_T') | _T = TypeVar('_T') | ||||
class PostLex(Protocol): | class PostLex(Protocol): | ||||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | def process(self, stream: Iterator[Token]) -> Iterator[Token]: | ||||
@@ -46,6 +49,7 @@ class PackageResource(object): | |||||
def __init__(self, pkg_name: str, path: str): ... | def __init__(self, pkg_name: str, path: str): ... | ||||
class FromPackageLoader: | class FromPackageLoader: | ||||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | ||||
@@ -87,6 +91,9 @@ class Lark: | |||||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | ||||
... | ... | ||||
def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||||
... | |||||
@classmethod | @classmethod | ||||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | ||||
... | ... | ||||
@@ -9,9 +9,20 @@ class ParserPuppet(object): | |||||
Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) | Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) | ||||
""" | """ | ||||
parser: Any | |||||
parser_state: Any | |||||
lexer_state: Any | |||||
def feed_token(self, token: Token) -> Any: ... | def feed_token(self, token: Token) -> Any: ... | ||||
def exhaust_lexer(self) -> None: ... | |||||
def feed_eof(self, last_token: Token = None) -> Any: ... | |||||
def copy(self) -> ParserPuppet: ... | def copy(self) -> ParserPuppet: ... | ||||
def as_immutable(self) -> ImmutableParserPuppet: ... | |||||
def pretty(self) -> str: ... | def pretty(self) -> str: ... | ||||
@@ -20,3 +31,13 @@ class ParserPuppet(object): | |||||
def accepts(self) -> Set[str]: ... | def accepts(self) -> Set[str]: ... | ||||
def resume_parse(self) -> Tree: ... | def resume_parse(self) -> Tree: ... | ||||
class ImmutableParserPuppet(ParserPuppet): | |||||
result: Any = None | |||||
def feed_token(self, token: Token) -> ImmutableParserPuppet: ... | |||||
def exhaust_lexer(self) -> ImmutableParserPuppet: ... | |||||
def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ... |
@@ -531,6 +531,9 @@ class Lark(Serialize): | |||||
def get_terminal(self, name): | def get_terminal(self, name): | ||||
"Get information about a terminal" | "Get information about a terminal" | ||||
return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
def get_puppet(self, text=None, start=None): | |||||
return self.parser.get_puppet(text, start=start) | |||||
def parse(self, text, start=None, on_error=None): | def parse(self, text, start=None, on_error=None): | ||||
"""Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
@@ -89,18 +89,29 @@ class ParsingFrontend(Serialize): | |||||
if lexer_conf.postlex: | if lexer_conf.postlex: | ||||
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) | self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) | ||||
def parse(self, text, start=None, on_error=None): | |||||
def _verify_start(self, start=None): | |||||
if start is None: | if start is None: | ||||
start = self.parser_conf.start | start = self.parser_conf.start | ||||
if len(start) > 1: | if len(start) > 1: | ||||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | ||||
start ,= start | start ,= start | ||||
elif start not in self.parser_conf.start: | |||||
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) | |||||
return start | |||||
def parse(self, text, start=None, on_error=None): | |||||
start = self._verify_start(start) | |||||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | stream = text if self.skip_lexer else LexerThread(self.lexer, text) | ||||
kw = {} if on_error is None else {'on_error': on_error} | kw = {} if on_error is None else {'on_error': on_error} | ||||
return self.parser.parse(stream, start, **kw) | return self.parser.parse(stream, start, **kw) | ||||
def get_puppet(self, text=None, start=None): | |||||
start = self._verify_start(start) | |||||
if self.parser_conf.parser_type != 'lalr': | |||||
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.") | |||||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||||
return self.parser.get_puppet(stream, start) | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
@@ -32,6 +32,10 @@ class LALR_Parser(Serialize): | |||||
def serialize(self, memo): | def serialize(self, memo): | ||||
return self._parse_table.serialize(memo) | return self._parse_table.serialize(memo) | ||||
def get_puppet(self, lexer, start): | |||||
return self.parser.get_puppet(lexer, start) | |||||
def parse(self, lexer, start, on_error=None): | def parse(self, lexer, start, on_error=None): | ||||
try: | try: | ||||
@@ -158,10 +162,16 @@ class _Parser(object): | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.debug = debug | self.debug = debug | ||||
def get_puppet(self, lexer, start, value_stack=None, state_stack=None): | |||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start) | |||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | |||||
return ParserPuppet(self, parser_state, parser_state.lexer) | |||||
def parse(self, lexer, start, value_stack=None, state_stack=None): | def parse(self, lexer, start, value_stack=None, state_stack=None): | ||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start) | parse_conf = ParseConf(self.parse_table, self.callbacks, start) | ||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | ||||
return self.parse_from_state(parser_state) | return self.parse_from_state(parser_state) | ||||
def parse_from_state(self, state): | def parse_from_state(self, state): | ||||
# Main LALR-parser loop | # Main LALR-parser loop | ||||
@@ -23,6 +23,19 @@ class ParserPuppet(object): | |||||
Note that ``token`` has to be an instance of ``Token``. | Note that ``token`` has to be an instance of ``Token``. | ||||
""" | """ | ||||
return self.parser_state.feed_token(token, token.type == '$END') | return self.parser_state.feed_token(token, token.type == '$END') | ||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the puppet. | |||||
Note that this modifies the puppet in place and does not feed an '$END' Token""" | |||||
for token in self.lexer_state.lex(self.parser_state): | |||||
self.parser_state.feed_token(token) | |||||
def feed_eof(self, last_token=None): | |||||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||||
return self.feed_token(eof) | |||||
def __copy__(self): | def __copy__(self): | ||||
"""Create a new puppet with a separate state. | """Create a new puppet with a separate state. | ||||
@@ -93,4 +106,13 @@ class ImmutableParserPuppet(ParserPuppet): | |||||
def feed_token(self, token): | def feed_token(self, token): | ||||
c = copy(self) | c = copy(self) | ||||
c.result = ParserPuppet.feed_token(c, token) | c.result = ParserPuppet.feed_token(c, token) | ||||
return c | |||||
return c | |||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the puppet. | |||||
Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token""" | |||||
res = copy(self) | |||||
for token in res.lexer_state.lex(res.parser_state): | |||||
res = res.parser_state.feed_token(token) | |||||
return res |
@@ -2395,6 +2395,42 @@ def _make_parser_test(LEXER, PARSER): | |||||
""", regex=True) | """, regex=True) | ||||
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | ||||
@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment") | |||||
def test_parser_puppet(self): | |||||
g = _Lark(r''' | |||||
start: A+ B* | |||||
A: "a" | |||||
B: "b" | |||||
''') | |||||
puppet = g.get_puppet() | |||||
self.assertRaises(UnexpectedToken, puppet.feed_eof) | |||||
self.assertRaises(TypeError, puppet.exhaust_lexer) | |||||
puppet.feed_token(Token('A', 'a')) | |||||
res = puppet.feed_eof() | |||||
self.assertEqual(res, Tree('start', ['a'])) | |||||
puppet = g.get_puppet("ab") | |||||
puppet.exhaust_lexer() | |||||
puppet_copy = puppet.copy() | |||||
self.assertEqual(puppet_copy.parser_state, puppet.parser_state) | |||||
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state) | |||||
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state) | |||||
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state) | |||||
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr) | |||||
res = puppet.feed_eof(puppet.lexer_state.state.last_token) | |||||
self.assertEqual(res, Tree('start', ['a', 'b'])) | |||||
self.assertRaises(UnexpectedToken ,puppet.feed_eof) | |||||
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a')) | |||||
puppet_copy.feed_token(Token('B', 'b')) | |||||
res = puppet_copy.feed_eof() | |||||
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | |||||
@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | ||||
def test_error_with_puppet(self): | def test_error_with_puppet(self): | ||||
@@ -2408,14 +2444,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
# Skip comma | # Skip comma | ||||
return True | return True | ||||
elif e.token.type == 'SIGNED_NUMBER': | elif e.token.type == 'SIGNED_NUMBER': | ||||
# Make a copy and ensure it is properly made | |||||
puppet_copy = e.puppet.copy() | |||||
assert puppet_copy.parser_state == e.puppet.parser_state | |||||
assert puppet_copy.lexer_state.state == e.puppet.lexer_state.state | |||||
assert puppet_copy.parser_state is not e.puppet.parser_state | |||||
assert puppet_copy.lexer_state.state is not e.puppet.lexer_state.state | |||||
assert puppet_copy.lexer_state.state.line_ctr is not e.puppet.lexer_state.state.line_ctr | |||||
# Try to feed a comma and retry the number | # Try to feed a comma and retry the number | ||||
e.puppet.feed_token(Token('COMMA', ',')) | e.puppet.feed_token(Token('COMMA', ',')) | ||||
e.puppet.feed_token(e.token) | e.puppet.feed_token(e.token) | ||||