Browse Source

made error message contain actual source code

make _all_terminals a dict
added raw attribute to Pattern
rename nice_print -> user_repr
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 3 years ago
parent
commit
bc9ed5376d
4 changed files with 18 additions and 18 deletions
  1. +7
    -5
      lark/exceptions.py
  2. +5
    -4
      lark/lexer.py
  3. +4
    -7
      lark/load_grammar.py
  4. +2
    -2
      lark/parser_frontends.py

+ 7
- 5
lark/exceptions.py View File

@@ -113,17 +113,19 @@ class UnexpectedInput(LarkError):
def _format_terminals(self, names): def _format_terminals(self, names):
if self._all_terminals: if self._all_terminals:
t = []
if isinstance(self._all_terminals, list):
self._all_terminals = {t.name: t for t in self._all_terminals}
ts = []
for name in names: for name in names:
try: try:
t.append(next(t.nice_print for t in self._all_terminals if t.name == name))
ts.append(self._all_terminals[name].user_repr)
except StopIteration: except StopIteration:
# If we don't find the corresponding Terminal (which *should* never happen), don't error. # If we don't find the corresponding Terminal (which *should* never happen), don't error.
# Broken __str__ for Exception are some of the worst bugs # Broken __str__ for Exception are some of the worst bugs
t.append(t.display_name)
ts.append(name)
else: else:
t = names
return "Expected one of: \n\t* %s\n" % '\n\t* '.join(t)
ts = names
return "Expected one of: \n\t* %s\n" % '\n\t* '.join(ts)








+ 5
- 4
lark/lexer.py View File

@@ -10,9 +10,10 @@ from copy import copy


class Pattern(Serialize): class Pattern(Serialize):


def __init__(self, value, flags=()):
def __init__(self, value, flags=(), raw=None):
self.value = value self.value = value
self.flags = frozenset(flags) self.flags = frozenset(flags)
self.raw = raw


def __repr__(self): def __repr__(self):
return repr(self.to_regexp()) return repr(self.to_regexp())
@@ -76,15 +77,15 @@ class PatternRE(Pattern):




class TerminalDef(Serialize): class TerminalDef(Serialize):
__serialize_fields__ = 'name', 'pattern', 'priority', 'nice_print'
__serialize_fields__ = 'name', 'pattern', 'priority', 'user_repr'
__serialize_namespace__ = PatternStr, PatternRE __serialize_namespace__ = PatternStr, PatternRE


def __init__(self, name, pattern, priority=1, nice_print=None):
def __init__(self, name, pattern, priority=1, user_repr=None):
assert isinstance(pattern, Pattern), pattern assert isinstance(pattern, Pattern), pattern
self.name = name self.name = name
self.pattern = pattern self.pattern = pattern
self.priority = priority self.priority = priority
self.nice_print = nice_print or name
self.user_repr = user_repr or name


def __repr__(self): def __repr__(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)


+ 4
- 7
lark/load_grammar.py View File

@@ -317,11 +317,9 @@ class PrepareAnonTerminals(Transformer_InPlace):
raise GrammarError(u'Conflicting flags for the same terminal: %s' % p) raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)


term_name = None term_name = None
nice_print = None
user_repr = p.raw # This will always be ok, independent of what term_name we end up using


if isinstance(p, PatternStr): if isinstance(p, PatternStr):
nice_print = repr(value) # This will always be ok, independent of what term_name we end up using
# TODO: potentially try to get the actual source code, and not the repr
try: try:
# If already defined, use the user-defined terminal name # If already defined, use the user-defined terminal name
term_name = self.term_reverse[p].name term_name = self.term_reverse[p].name
@@ -337,7 +335,6 @@ class PrepareAnonTerminals(Transformer_InPlace):
term_name = None term_name = None


elif isinstance(p, PatternRE): elif isinstance(p, PatternRE):
#TODO: generate nice_print
if p in self.term_reverse: # Kind of a weird placement.name if p in self.term_reverse: # Kind of a weird placement.name
term_name = self.term_reverse[p].name term_name = self.term_reverse[p].name
else: else:
@@ -350,7 +347,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
if term_name not in self.term_set: if term_name not in self.term_set:
assert p not in self.term_reverse assert p not in self.term_reverse
self.term_set.add(term_name) self.term_set.add(term_name)
termdef = TerminalDef(term_name, p, nice_print=nice_print)
termdef = TerminalDef(term_name, p, user_repr=user_repr)
self.term_reverse[p] = termdef self.term_reverse[p] = termdef
self.terminals.append(termdef) self.terminals.append(termdef)


@@ -426,9 +423,9 @@ def _literal_to_pattern(literal):


if literal.type == 'STRING': if literal.type == 'STRING':
s = s.replace('\\\\', '\\') s = s.replace('\\\\', '\\')
return PatternStr(s, flags)
return PatternStr(s, flags, raw=literal.value)
elif literal.type == 'REGEXP': elif literal.type == 'REGEXP':
return PatternRE(s, flags)
return PatternRE(s, flags, raw=literal.value)
else: else:
assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]' assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'




+ 2
- 2
lark/parser_frontends.py View File

@@ -196,7 +196,7 @@ class Earley(WithLexer):


class XEarley(_ParserFrontend): class XEarley(_ParserFrontend):
def __init__(self, lexer_conf, parser_conf, options=None, **kw): def __init__(self, lexer_conf, parser_conf, options=None, **kw):
self.token_by_name = {t.name:t for t in lexer_conf.terminals}
self.terminals_by_name = {t.name:t for t in lexer_conf.terminals}
self.start = parser_conf.start self.start = parser_conf.start


self._prepare_match(lexer_conf) self._prepare_match(lexer_conf)
@@ -238,7 +238,7 @@ class XEarley(_ParserFrontend):
return self._parse(start, text) return self._parse(start, text)
except UnexpectedInput as e: except UnexpectedInput as e:
if e._all_terminals is None: if e._all_terminals is None:
e._all_terminals = self.token_by_name.values()
e._all_terminals = self.terminals_by_name
raise e raise e


class XEarley_CompleteLex(XEarley): class XEarley_CompleteLex(XEarley):


Loading…
Cancel
Save