@@ -2,6 +2,7 @@ from typing import List, Tuple, Union, Callable, Dict, Optional | |||
from lark import Tree | |||
from lark.grammar import RuleOptions | |||
from lark.exceptions import UnexpectedInput | |||
class Grammar: | |||
@@ -24,3 +25,6 @@ class GrammarBuilder: | |||
def validate(self) -> None: ... | |||
def build(self) -> Grammar: ... | |||
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ... |
@@ -1,5 +1,5 @@ | |||
from __future__ import absolute_import | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken, ConfigurationError, assert_config | |||
from lark.exceptions import ConfigurationError, assert_config | |||
import sys, os, pickle, hashlib | |||
from io import open | |||
@@ -518,35 +518,7 @@ class Lark(Serialize): | |||
result of the transformation. Otherwise, returns a Tree instance. | |||
""" | |||
try: | |||
return self.parser.parse(text, start=start) | |||
except UnexpectedInput as e: | |||
if on_error is None: | |||
raise | |||
while True: | |||
if isinstance(e, UnexpectedCharacters): | |||
s = e.puppet.lexer_state.state | |||
p = s.line_ctr.char_pos | |||
if not on_error(e): | |||
raise e | |||
if isinstance(e, UnexpectedCharacters): | |||
# If user didn't change the character position, then we should | |||
if p == s.line_ctr.char_pos: | |||
s.line_ctr.feed(s.text[p:p+1]) | |||
try: | |||
return e.puppet.resume_parse() | |||
except UnexpectedToken as e2: | |||
if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet: | |||
# Prevent infinite loop | |||
raise e2 | |||
e = e2 | |||
except UnexpectedCharacters as e2: | |||
e = e2 | |||
return self.parser.parse(text, start=start, on_error=on_error) | |||
@property | |||
def source(self): | |||
@@ -8,7 +8,7 @@ import pkgutil | |||
from ast import literal_eval | |||
from numbers import Integral | |||
from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start | |||
from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique | |||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
from .parse_tree_builder import ParseTreeBuilder | |||
@@ -16,7 +16,7 @@ from .parser_frontends import ParsingFrontend | |||
from .common import LexerConf, ParserConf | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .utils import classify, suppress, dedup_list, Str | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | |||
from .tree import Tree, SlottedTree as ST | |||
from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | |||
@@ -853,6 +853,54 @@ def _parse_grammar(text, name, start='start'): | |||
return PrepareGrammar().transform(tree) | |||
def _error_repr(error): | |||
if isinstance(error, UnexpectedToken): | |||
error2 = _translate_parser_exception(_get_parser().parse, error) | |||
if error2: | |||
return error2 | |||
expected = ', '.join(error.accepts or error.expected) | |||
return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected) | |||
else: | |||
return str(error) | |||
def _search_puppet(puppet, predicate): | |||
def expand(node): | |||
path, p = node | |||
for choice in p.choices(): | |||
t = Token(choice, '') | |||
try: | |||
new_p = p.feed_token(t) | |||
except ParseError: # Illegal | |||
pass | |||
else: | |||
yield path + (choice,), new_p | |||
for path, p in bfs_all_unique([((), puppet)], expand): | |||
if predicate(p): | |||
return path, p | |||
def find_grammar_errors(text, start='start'): | |||
errors = [] | |||
def on_error(e): | |||
errors.append((e, _error_repr(e))) | |||
# recover to a new line | |||
token_path, _ = _search_puppet(e.puppet.as_immutable(), lambda p: '_NL' in p.choices()) | |||
for token_type in token_path: | |||
e.puppet.feed_token(Token(token_type, '')) | |||
e.puppet.feed_token(Token('_NL', '\n')) | |||
return True | |||
_tree = _get_parser().parse(text + '\n', start, on_error=on_error) | |||
errors_by_line = classify(errors, lambda e: e[0].line) | |||
errors = [el[0] for el in errors_by_line.values()] # already sorted | |||
for e in errors: | |||
e[0].puppet = None | |||
return errors | |||
def _get_mangle(prefix, aliases, base_mangle=None): | |||
def mangle(s): | |||
if s in aliases: | |||
@@ -101,18 +101,16 @@ class ParsingFrontend(Serialize): | |||
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex) | |||
def parse(self, text, start=None): | |||
def parse(self, text, start=None, on_error=None): | |||
if start is None: | |||
start = self.parser_conf.start | |||
if len(start) > 1: | |||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
start ,= start | |||
if self.skip_lexer: | |||
return self.parser.parse(text, start) | |||
lexer_thread = LexerThread(self.lexer, text) | |||
return self.parser.parse(lexer_thread, start) | |||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||
kw = {} if on_error is None else {'on_error': on_error} | |||
return self.parser.parse(stream, start, **kw) | |||
def get_frontend(parser, lexer): | |||
@@ -9,6 +9,7 @@ from ..utils import Serialize | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
from .lalr_puppet import ParserPuppet | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
###{standalone | |||
@@ -32,8 +33,35 @@ class LALR_Parser(Serialize): | |||
def serialize(self, memo): | |||
return self._parse_table.serialize(memo) | |||
def parse(self, *args): | |||
return self.parser.parse(*args) | |||
def parse(self, lexer, start, on_error=None): | |||
try: | |||
return self.parser.parse(lexer, start) | |||
except UnexpectedInput as e: | |||
if on_error is None: | |||
raise | |||
while True: | |||
if isinstance(e, UnexpectedCharacters): | |||
s = e.puppet.lexer_state.state | |||
p = s.line_ctr.char_pos | |||
if not on_error(e): | |||
raise e | |||
if isinstance(e, UnexpectedCharacters): | |||
# If user didn't change the character position, then we should | |||
if p == s.line_ctr.char_pos: | |||
s.line_ctr.feed(s.text[p:p+1]) | |||
try: | |||
return e.puppet.resume_parse() | |||
except UnexpectedToken as e2: | |||
if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet: | |||
# Prevent infinite loop | |||
raise e2 | |||
e = e2 | |||
except UnexpectedCharacters as e2: | |||
e = e2 | |||
class ParseConf(object): | |||
@@ -318,6 +318,14 @@ def bfs(initial, expand): | |||
visited.add(next_node) | |||
open_q.append(next_node) | |||
def bfs_all_unique(initial, expand): | |||
"bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions" | |||
open_q = deque(list(initial)) | |||
while open_q: | |||
node = open_q.popleft() | |||
yield node | |||
open_q += expand(node) | |||
def _serialize(value, memo): | |||
if isinstance(value, Serialize): | |||
@@ -4,7 +4,7 @@ import sys | |||
from unittest import TestCase, main | |||
from lark import Lark, Token, Tree | |||
from lark.load_grammar import GrammarError, GRAMMAR_ERRORS | |||
from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors | |||
from lark.load_grammar import FromPackageLoader | |||
@@ -160,6 +160,41 @@ class TestGrammar(TestCase): | |||
x = p.parse('12 capybaras') | |||
self.assertEqual(x.children, ['12', 'capybaras']) | |||
def test_find_grammar_errors(self): | |||
text = """ | |||
a: rule | |||
b rule | |||
c: rule | |||
B.: "hello" f | |||
D: "okay" | |||
""" | |||
assert [e.line for e, _s in find_grammar_errors(text)] == [3, 5] | |||
text = """ | |||
a: rule | |||
b rule | |||
| ok | |||
c: rule | |||
B.: "hello" f | |||
D: "okay" | |||
""" | |||
assert [e.line for e, _s in find_grammar_errors(text)] == [3, 4, 6] | |||
text = """ | |||
a: rule @#$#@$@&& | |||
b: rule | |||
| ok | |||
c: rule | |||
B: "hello" f @ | |||
D: "okay" | |||
""" | |||
x = find_grammar_errors(text) | |||
assert [e.line for e, _s in find_grammar_errors(text)] == [2, 6] | |||
if __name__ == '__main__': | |||
main() | |||