Correction for PR

- `user_repr` is now a method - Fix for python 2.7 - excepts -> expected
4 years ago · 8ec6d0f2ab
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -12,6 +12,7 @@ _T = TypeVar('_T')
 class Pattern(ABC):
    value: str
    flags: Collection[str]
    raw: str

    def __init__(self, value: str, flags: Collection[str] = ...):
        ...
@@ -73,6 +74,8 @@ class TerminalDef:

    def __init__(self, name: str, pattern: Pattern, priority: int = ...):
        ...
    
    def user_repr(self) -> str: ...


 class Token(str):
--- a/lark/common.py
+++ b/lark/common.py
@@ -12,6 +12,8 @@ class LexerConf(Serialize):

    def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
        self.terminals = terminals
        self.terminals_by_names = {t.name: t for t in self.terminals}
        assert len(self.terminals) == len(self.terminals_by_names)
        self.ignore = ignore
        self.postlex = postlex
        self.callbacks = callbacks or {}
@@ -25,6 +27,9 @@ class LexerConf(Serialize):
    def tokens(self):
        warn("LexerConf.tokens is deprecated. Use LexerConf.terminals instead", DeprecationWarning)
        return self.terminals
    
    def _deserialize(self):
        self.terminals_by_names = {t.name: t for t in self.terminals}



--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,5 +1,6 @@
 from .utils import STRING_TYPE, logger


 ###{standalone


@@ -39,7 +40,7 @@ class UnexpectedInput(LarkError):
    After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
    """
    pos_in_stream = None
    _all_terminals = None
    _terminals_by_name = None

    def get_context(self, text, span=40):
        """Returns a pretty string pinpointing the error in the text,
@@ -96,7 +97,7 @@ class UnexpectedInput(LarkError):
                    if ut.state == self.state:
                        if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
                            logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                        (self.state, self.accepts, ut.accepts, i, j))
                                         (self.state, self.accepts, ut.accepts, i, j))
                            continue
                        try:
                            if ut.token == self.token:  # Try exact match first
@@ -116,71 +117,69 @@ class UnexpectedInput(LarkError):
                            candidate = label, False

        return candidate[0]
    
    def _format_terminals(self, names):
        if self._all_terminals:
            if isinstance(self._all_terminals, list):
                self._all_terminals = {t.name: t for t in self._all_terminals}

    def _format_expected(self, expected):
        if self._terminals_by_name:
            ts = []
            for name in names:
                try:
                    ts.append(self._all_terminals[name].user_repr)
                except StopIteration:
                    # If we don't find the corresponding Terminal (which *should* never happen), don't error.
                    # Broken __str__ for Exception are some of the worst bugs
                    ts.append(name)
            for ter in expected:
                ts.append(self._terminals_by_name[ter].user_repr())
        else:
            ts = names
            ts = expected
        return "Expected one of: \n\t* %s\n" % '\n\t* '.join(ts)


 class UnexpectedEOF(ParseError, UnexpectedInput):
    def __init__(self, expected, state=None):
    def __init__(self, expected, state=None, terminals_by_name=None):
        self.expected = expected
        self.state = state
        from .lexer import Token
        self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1)
        self.token = Token("<EOF>", "")  # , line=-1, column=-1, pos_in_stream=-1)
        self.pos_in_stream = -1
        self.line = -1
        self.column = -1
        self._terminals_by_name = terminals_by_name

        message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
        super(UnexpectedEOF, self).__init__(message)
        super(UnexpectedEOF, self).__init__()

    def __str__(self):
        message = "Unexpected end-of-input. "
        message += self._format_expected(self.expected)
        return message


 class UnexpectedCharacters(LexError, UnexpectedInput):
    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, _all_terminals=None):
    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
                 terminals_by_name=None):
        # TODO considered_tokens and allowed can be figured out using state
        self.line = line
        self.column = column
        self.pos_in_stream = lex_pos
        self.state = state
        self._all_terminals = _all_terminals
        self._terminals_by_name = terminals_by_name

        self.allowed = allowed
        self.considered_tokens = considered_tokens
        self.token_history = token_history

        if isinstance(seq, bytes):
            self._s = seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace")
            self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
        else:
            self._s = seq[lex_pos]
            self.char = seq[lex_pos]
        self._context = self.get_context(seq)
        

        super(UnexpectedCharacters, self).__init__()

    def __str__(self):
        # Be aware: Broken __str__ for Exceptions are terrible to debug. Make sure there is as little room as possible for errors
        # You will get just `UnexpectedCharacters: <str() failed>` or something like that
        # If you run into this, add an `except Exception as e: print(e); raise e` or similar.
        message = "No terminal defined for '%s' at line %d col %d" % (self._s, self.line, self.column)
        message = "No terminal defined for '%s' at line %d col %d" % (self.char, self.line, self.column)
        message += '\n\n' + self._context
        if self.allowed:
            message += self._format_terminals(self.allowed)
            message += self._format_expected(self.allowed)
        if self.token_history:
            message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
        return message

 _not_set_marker = object()

 class UnexpectedToken(ParseError, UnexpectedInput):
    """When the parser throws UnexpectedToken, it instantiates a puppet
    with its internal state. Users can then interactively set the puppet to
@@ -188,7 +187,8 @@ class UnexpectedToken(ParseError, UnexpectedInput):

    see: :ref:`ParserPuppet`.
    """
    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, all_terminals=None, token_history=None):

    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, terminals_by_name=None, token_history=None):
        # TODO considered_rules and expected can be figured out using state
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')
@@ -196,23 +196,24 @@ class UnexpectedToken(ParseError, UnexpectedInput):
        self.state = state

        self.token = token
        self.expected = expected     # XXX deprecate? `accepts` is better
        self.expected = expected  # XXX deprecate? `accepts` is better
        self._accepts = _not_set_marker
        self.considered_rules = considered_rules
        self.puppet = puppet
        self._all_terminals = all_terminals
        self._terminals_by_name = terminals_by_name
        self.token_history = token_history


        super(UnexpectedToken, self).__init__()
    

    @property
    def accepts(self):
        return self.puppet and self.puppet.accepts()
    
        if self._accepts is _not_set_marker:
            self._accepts =  self.puppet and self.puppet.accepts()
        return self._accepts

    def __str__(self):
        # Be aware: Broken __str__ for Exceptions are terrible to debug. Make sure there is as little room as possible for errors
        message = ("Unexpected token %r at line %s, column %s.\n%s"
                   % (self.token, self.line, self.column, self._format_terminals(self.accepts or self.expected)))
                   % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
        if self.token_history:
            message += "Previous tokens: %r\n" % self.token_history

@@ -226,6 +227,7 @@ class VisitError(LarkError):
    - obj: the tree node or token it was processing when the exception was raised
    - orig_exc: the exception that cause it to fail
    """

    def __init__(self, rule, obj, orig_exc):
        self.obj = obj
        self.orig_exc = orig_exc
@@ -233,5 +235,4 @@ class VisitError(LarkError):
        message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
        super(VisitError, self).__init__(message)


 ###}
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -81,18 +81,23 @@ class PatternRE(Pattern):


 class TerminalDef(Serialize):
    __serialize_fields__ = 'name', 'pattern', 'priority', 'user_repr'
    __serialize_fields__ = 'name', 'pattern', 'priority'
    __serialize_namespace__ = PatternStr, PatternRE

    def __init__(self, name, pattern, priority=1, user_repr=None):
    def __init__(self, name, pattern, priority=1):
        assert isinstance(pattern, Pattern), pattern
        self.name = name
        self.pattern = pattern
        self.priority = priority
        self.user_repr = user_repr or name

    def __repr__(self):
        return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
    
    def user_repr(self):
        if self.name.startswith('__'): # We represent a generated terminal
            return self.pattern.raw or self.name
        else:
            return self.name


 class Token(Str):
@@ -312,6 +317,7 @@ class TraditionalLexer(Lexer):
        self.user_callbacks = conf.callbacks
        self.g_regex_flags = conf.g_regex_flags
        self.use_bytes = conf.use_bytes
        self.terminals_by_names = conf.terminals_by_names

        self._mres = None

@@ -355,7 +361,7 @@ class TraditionalLexer(Lexer):
                    allowed = {"<END-OF-FILE>"}
                raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
                                           allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
                                           state=parser_state, _all_terminals=self.terminals)
                                           state=parser_state, terminals_by_name=self.terminals_by_names)

            value, type_ = res

@@ -397,10 +403,7 @@ class ContextualLexer(Lexer):

    def __init__(self, conf, states, always_accept=()):
        terminals = list(conf.terminals)
        tokens_by_name = {}
        for t in terminals:
            assert t.name not in tokens_by_name, t
            tokens_by_name[t.name] = t
        tokens_by_name = conf.terminals_by_names

        trad_conf = copy(conf)
        trad_conf.terminals = terminals
@@ -437,8 +440,13 @@ class ContextualLexer(Lexer):
        except UnexpectedCharacters as e:
            # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
            # This tests the input against the global context, to provide a nicer error.
            token = self.root_lexer.next_token(lexer_state)
            raise UnexpectedToken(token, e.allowed, state=parser_state.position, token_history=[lexer_state.last_token], all_terminals=self.root_lexer.terminals)
            last_token = lexer_state.last_token # self.root_lexer.next_token will change this to the wrong token
            try:
                token = self.root_lexer.next_token(lexer_state, parser_state)
            except UnexpectedCharacters:
                raise e# Don't raise the exception that the root lexer raise. It has the wrong expected set.
            else:
                raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_names)

 class LexerThread:
    """A thread that ties a lexer instance and a lexer state, to be used by the parser"""
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -322,7 +322,6 @@ class PrepareAnonTerminals(Transformer_InPlace):
            raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)

        term_name = None
        user_repr = p.raw # This will always be ok, independent of what term_name we end up using

        if isinstance(p, PatternStr):
            try:
@@ -354,7 +353,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
        if term_name not in self.term_set:
            assert p not in self.term_reverse
            self.term_set.add(term_name)
            termdef = TerminalDef(term_name, p, user_repr=user_repr)
            termdef = TerminalDef(term_name, p)
            self.term_reverse[p] = termdef
            self.terminals.append(termdef)

--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -302,7 +302,7 @@ class Parser:
        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
        solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
        if not solutions:
            expected_terminals = [t.expect for t in to_scan]
            expected_terminals = [t.expect.name for t in to_scan]
            raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))

        if self.debug: