From a4ddb1e84f58e892282788a94197cd76bf7d99f9 Mon Sep 17 00:00:00 2001
From: MegaIng <trampchamp@hotmail.de>
Date: Wed, 18 Nov 2020 12:03:13 +0100
Subject: [PATCH 01/13] Adding missing Testcase to `__main__`

---
 tests/__main__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/__main__.py b/tests/__main__.py
index 5ec89e3..1814564 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -9,6 +9,7 @@ from .test_tools import TestStandalone
 from .test_cache import TestCache
 from .test_grammar import TestGrammar
 from .test_reconstructor import TestReconstructor
+from .test_tree_forest_transformer import TestTreeForestTransformer
 
 try:
     from .test_nearley.test_nearley import TestNearley

From 11a0052eb5b0fef5fcba836f55394c668a90ac3a Mon Sep 17 00:00:00 2001
From: Greg Ward <gward@fibrenoire.ca>
Date: Wed, 18 Nov 2020 14:15:37 -0500
Subject: [PATCH 02/13] Fix incorrect type hint for 'lexer' argument to Lark
 constructor

The code is crystal clear:

  assert lexer in ('standard', ...) or issubclass(lexer, Lexer)

But the type hint said that lexer must be an _instance_ of Lexer, not
a subclass. This change fixes it to require a subclass of Lexer.
---
 lark-stubs/lark.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi
index 8363a5d..7dc8626 100644
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -63,7 +63,7 @@ class Lark:
         *,
         start: Union[None, str, List[str]] = "start",
         parser: Literal["earley", "lalr", "cyk"] = "auto",
-        lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Lexer] = "auto",
+        lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto",
         transformer: Optional[Transformer] = None,
         postlex: Optional[PostLex] = None,
         ambiguity: Literal["explicit", "resolve"] = "resolve",

From 68e5e86b5ba84edd3e92f9ba16aca1bb8174e72c Mon Sep 17 00:00:00 2001
From: Greg Ward <gward@fibrenoire.ca>
Date: Wed, 18 Nov 2020 14:34:01 -0500
Subject: [PATCH 03/13] Add missing type hint for Token constructor

mypy thinks that Token's __init__ is inherited from __str__(). That's
not wrong -- it's just irrelevant, because Token also implements
__new__(). Token's _effective_ constructor signature is determined by
its __new__() method, so that's what I have used in the type hint.

Not clear if 'value' is supposed to be Any, but that's what I need in
my application. And it works just fine!
---
 lark-stubs/lexer.pyi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi
index 12d3dfe..a654b0f 100644
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -85,6 +85,9 @@ class Token(str):
     end_column: int
     end_pos: int
 
+    def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None):
+        ...
+
     def update(self, type_: Optional[str] = None, value: Optional[str] = None) -> Token:
         ...
 

From 1fc08100860de65709c9e8f533f8a81087206e65 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 20 Nov 2020 13:24:55 -0800
Subject: [PATCH 04/13] Fix broken link in README

See https://gitter.im/lark-parser/Lobby?at=5fb83369771c185e0eb8c0e2
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d9afc7f..425d4bb 100644
--- a/README.md
+++ b/README.md
@@ -106,7 +106,7 @@ Lark is great at handling ambiguity. Here is the result of parsing the phrase "f
  - MyPy support using type stubs
  - And much more!
 
-See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/)
+See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features.html)
 
 
 ### Comparison to other libraries

From 2e06d4c000d108505944b3863fe4581b40f1e066 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 20 Nov 2020 13:28:47 -0800
Subject: [PATCH 05/13] Update README.md

Fixed pyparsing link to point to

https://github.com/pyparsing/pyparsing
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 425d4bb..2d69420 100644
--- a/README.md
+++ b/README.md
@@ -132,7 +132,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
 |:--------|:----------|:----|:--------|:------------|:------------|:----------|:----------
 | **Lark** | Earley/LALR(1) | EBNF | Yes! | Yes! | Yes! | Yes! | Yes! (LALR only) |
 | [PLY](http://www.dabeaz.com/ply/) | LALR(1) | BNF | No | No | No | No | No |
-| [PyParsing](http://pyparsing.wikispaces.com/) | PEG | Combinators | No | No | No\* | No | No |
+| [PyParsing](https://github.com/pyparsing/pyparsing) | PEG | Combinators | No | No | No\* | No | No |
 | [Parsley](https://pypi.python.org/pypi/Parsley) | PEG | EBNF | No | No | No\* | No | No |
 | [Parsimonious](https://github.com/erikrose/parsimonious) | PEG | EBNF | Yes | No | No\* | No | No |
 | [ANTLR](https://github.com/antlr/antlr4) | LL(*) | EBNF | Yes | No | Yes? | Yes | No |

From 4e442bc0b8379fb764b26b46f99d2bf32eb580c4 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Sat, 21 Nov 2020 20:56:14 +0100
Subject: [PATCH 06/13] regression-fix for #760

---
 lark-stubs/lexer.pyi |  2 +-
 lark/lexer.py        | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi
index a654b0f..3f246fb 100644
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -139,7 +139,7 @@ class TraditionalLexer(Lexer):
     def lex(self, stream: str) -> Iterator[Token]:
         ...
 
-    def next_token(self, lex_state: Any) -> Token:
+    def next_token(self, lex_state: Any, parser_state: Any = None) -> Token:
         ...
 
 class ContextualLexer(Lexer):
diff --git a/lark/lexer.py b/lark/lexer.py
index 4c420e7..6d69ec9 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -338,12 +338,12 @@ class TraditionalLexer(Lexer):
             if m:
                 return m.group(0), type_from_index[m.lastindex]
 
-    def lex(self, state, _parser_state):
+    def lex(self, state, parser_state):
         with suppress(EOFError):
             while True:
-                yield self.next_token(state)
+                yield self.next_token(state, parser_state)
 
-    def next_token(self, lex_state):
+    def next_token(self, lex_state, parser_state=None):
         line_ctr = lex_state.line_ctr
         while line_ctr.char_pos < len(lex_state.text):
             res = self.match(lex_state.text, line_ctr.char_pos)
@@ -352,7 +352,8 @@ class TraditionalLexer(Lexer):
                 if not allowed:
                     allowed = {"<END-OF-FILE>"}
                 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
-                                           allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token])
+                                           allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
+                                           state=(parser_state and parser_state.position))
 
             value, type_ = res
 
@@ -428,13 +429,13 @@ class ContextualLexer(Lexer):
         try:
             while True:
                 lexer = self.lexers[parser_state.position]
-                yield lexer.next_token(lexer_state)
+                yield lexer.next_token(lexer_state, parser_state)
         except EOFError:
             pass
         except UnexpectedCharacters as e:
             # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
             # This tests the input against the global context, to provide a nicer error.
-            token = self.root_lexer.next_token(lexer_state)
+            token = self.root_lexer.next_token(lexer_state, parser_state)
             raise UnexpectedToken(token, e.allowed, state=parser_state.position)
 
 

From b3eb2a31201b3aba6626502ee771885a556438a6 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 21 Nov 2020 22:51:22 +0200
Subject: [PATCH 07/13] Add token_history to UnexpectedToken

---
 lark/exceptions.py | 6 +++++-
 lark/lexer.py      | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 8444a65..ed7b9c7 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -147,7 +147,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
 
     see: :ref:`ParserPuppet`.
     """
-    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
+    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, token_history=None):
         self.line = getattr(token, 'line', '?')
         self.column = getattr(token, 'column', '?')
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)
@@ -157,6 +157,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
         self.expected = expected     # XXX deprecate? `accepts` is better
         self.considered_rules = considered_rules
         self.puppet = puppet
+        self.token_history = token_history
 
         # TODO Only calculate `accepts()` when we need to display it to the user
         # This will improve performance when doing automatic error handling
@@ -166,6 +167,9 @@ class UnexpectedToken(ParseError, UnexpectedInput):
                    "Expected one of: \n\t* %s\n"
                    % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected)))
 
+        if self.token_history:
+            message += "Previous tokens: %r\n" % token_history
+
         super(UnexpectedToken, self).__init__(message)
 
 
diff --git a/lark/lexer.py b/lark/lexer.py
index 6d69ec9..8be8acd 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -436,7 +436,7 @@ class ContextualLexer(Lexer):
             # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
             # This tests the input against the global context, to provide a nicer error.
             token = self.root_lexer.next_token(lexer_state, parser_state)
-            raise UnexpectedToken(token, e.allowed, state=parser_state.position)
+            raise UnexpectedToken(token, e.allowed, state=parser_state.position, token_history=[lexer_state.last_token])
 
 
 class LexerThread:

From f285cda4f25ae9f459bc772a682f2f384bacddd2 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 16 Nov 2020 17:19:26 +0200
Subject: [PATCH 08/13] Earley error reporting - initial (Issue #760)

---
 examples/advanced/error_reporting_earley.py | 79 +++++++++++++++++++++
 examples/advanced/error_reporting_lalr.py   |  2 +-
 lark/__init__.py                            |  2 +-
 lark/exceptions.py                          | 24 ++++---
 lark/parsers/earley.py                      |  2 +-
 lark/parsers/xearley.py                     |  2 +-
 6 files changed, 98 insertions(+), 13 deletions(-)
 create mode 100644 examples/advanced/error_reporting_earley.py

diff --git a/examples/advanced/error_reporting_earley.py b/examples/advanced/error_reporting_earley.py
new file mode 100644
index 0000000..f0bcc20
--- /dev/null
+++ b/examples/advanced/error_reporting_earley.py
@@ -0,0 +1,79 @@
+"""
+Example-Driven Error Reporting
+==============================
+
+A demonstration of example-driven error reporting with the Earley parser
+(See also: error_reporting_lalr.py)
+"""
+from lark import Lark, UnexpectedInput
+
+from _json_parser import json_grammar   # Using the grammar from the json_parser example
+
+json_parser = Lark(json_grammar)
+
+class JsonSyntaxError(SyntaxError):
+    def __str__(self):
+        context, line, column = self.args
+        return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context)
+
+class JsonMissingValue(JsonSyntaxError):
+    label = 'Missing Value'
+
+class JsonMissingOpening(JsonSyntaxError):
+    label = 'Missing Opening'
+
+class JsonMissingClosing(JsonSyntaxError):
+    label = 'Missing Closing'
+
+class JsonMissingComma(JsonSyntaxError):
+    label = 'Missing Comma'
+
+class JsonTrailingComma(JsonSyntaxError):
+    label = 'Trailing Comma'
+
+
+def parse(json_text):
+    try:
+        j = json_parser.parse(json_text)
+    except UnexpectedInput as u:
+        exc_class = u.match_examples(json_parser.parse, {
+            JsonMissingOpening: ['{"foo": ]}',
+                                 '{"foor": }}',
+                                 '{"foo": }'],
+            JsonMissingClosing: ['{"foo": [}',
+                                 '{',
+                                 '{"a": 1',
+                                 '[1'],
+            JsonMissingComma: ['[1 2]',
+                               '[false 1]',
+                               '["b" 1]',
+                               '{"a":true 1:4}',
+                               '{"a":1 1:4}',
+                               '{"a":"b" 1:4}'],
+            JsonTrailingComma: ['[,]',
+                                '[1,]',
+                                '[1,2,]',
+                                '{"foo":1,}',
+                                '{"foo":false,"bar":true,}']
+        }, use_accepts=True)
+        if not exc_class:
+            raise
+        raise exc_class(u.get_context(json_text), u.line, u.column)
+
+
+def test():
+    try:
+        parse('{"example1": "value"')
+    except JsonMissingClosing as e:
+        print(e)
+
+    try:
+        parse('{"example2": ] ')
+    except JsonMissingOpening as e:
+        print(e)
+
+
+if __name__ == '__main__':
+    test()
+
+
diff --git a/examples/advanced/error_reporting_lalr.py b/examples/advanced/error_reporting_lalr.py
index 102f7b1..c2cb239 100644
--- a/examples/advanced/error_reporting_lalr.py
+++ b/examples/advanced/error_reporting_lalr.py
@@ -3,7 +3,7 @@ Example-Driven Error Reporting
 ==============================
 
 A demonstration of example-driven error reporting with the LALR parser
-
+(See also: error_reporting_earley.py)
 """
 from lark import Lark, UnexpectedInput
 
diff --git a/lark/__init__.py b/lark/__init__.py
index 814fe66..168a969 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -3,7 +3,7 @@ from .tree import Tree
 from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
 from .visitors import InlineTransformer, inline_args   # XXX Deprecated
 from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
-                         UnexpectedInput, UnexpectedCharacters, LarkError)
+                         UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
 from .lexer import Token
 from .lark import Lark
 
diff --git a/lark/exceptions.py b/lark/exceptions.py
index ed7b9c7..ab4b139 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -19,14 +19,6 @@ class LexError(LarkError):
     pass
 
 
-class UnexpectedEOF(ParseError):
-    def __init__(self, expected):
-        self.expected = expected
-
-        message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
-        super(UnexpectedEOF, self).__init__(message)
-
-
 class UnexpectedInput(LarkError):
     """UnexpectedInput Error.
 
@@ -47,6 +39,7 @@ class UnexpectedInput(LarkError):
             The parser doesn't hold a copy of the text it has to parse,
             so you have to provide it again
         """
+        assert self.pos_in_stream is not None, self
         pos = self.pos_in_stream
         start = max(pos - span, 0)
         end = pos + span
@@ -91,7 +84,7 @@ class UnexpectedInput(LarkError):
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
                     if ut.state == self.state:
-                        if use_accepts and ut.accepts != self.accepts:
+                        if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
                             logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                         (self.state, self.accepts, ut.accepts, i, j))
                             continue
@@ -114,6 +107,19 @@ class UnexpectedInput(LarkError):
 
         return candidate[0]
 
+class UnexpectedEOF(ParseError, UnexpectedInput):
+    def __init__(self, expected, state=None):
+        self.expected = expected
+        self.state = state
+        from .lexer import Token
+        self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1)
+        self.pos_in_stream = -1
+        self.line = -1
+        self.column = -1
+
+        message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
+        super(UnexpectedEOF, self).__init__(message)
+
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index f0bb7f5..aa18371 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -299,7 +299,7 @@ class Parser:
         solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
         if not solutions:
             expected_terminals = [t.expect for t in to_scan]
-            raise UnexpectedEOF(expected_terminals)
+            raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan})
 
         if self.debug:
             from .earley_forest import ForestToPyDotVisitor
diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py
index 256fc2c..ae98f0f 100644
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -113,7 +113,7 @@ class Parser(BaseParser):
             del delayed_matches[i+1]    # No longer needed, so unburden memory
 
             if not next_set and not delayed_matches and not next_to_scan:
-                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan))
+                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan})
 
             return next_to_scan
 

From 1aff84391a416cd28cd086dd78ff4b08058b9884 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 21 Nov 2020 23:08:58 +0200
Subject: [PATCH 09/13] Added test for match_examples

---
 lark/exceptions.py   |  2 +-
 tests/test_parser.py | 28 ++++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index ab4b139..92ac019 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -101,7 +101,7 @@ class UnexpectedInput(LarkError):
 
                         except AttributeError:
                             pass
-                        if not candidate[0]:
+                        if candidate[0] is None:
                             logger.debug("Same State match at example [%s][%s]" % (i, j))
                             candidate = label, False
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 39bd00c..edb4b26 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -323,7 +323,7 @@ class TestParsers(unittest.TestCase):
 
     def test_alias(self):
         Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
-        
+
     def test_backwards_custom_lexer(self):
         class OldCustomLexer(Lexer):
             def __init__(self, lexer_conf):
@@ -331,12 +331,12 @@ class TestParsers(unittest.TestCase):
 
             def lex(self, text):
                 yield Token('A', 'A')
-        
+
         p = Lark("""
         start: A
         %declare A
         """, parser='lalr', lexer=OldCustomLexer)
-        
+
         r = p.parse('')
         self.assertEqual(r, Tree('start', [Token('A', 'A')]))
 
@@ -866,7 +866,7 @@ class CustomLexer(Lexer):
         self.lexer = TraditionalLexer(copy(lexer_conf))
     def lex(self, *args, **kwargs):
         return self.lexer.lex(*args, **kwargs)
-    
+
     __future_interface__ = True
 
 def _tree_structure_check(a, b):
@@ -2342,6 +2342,26 @@ def _make_parser_test(LEXER, PARSER):
                 self.assertEqual(a.line, 1)
                 self.assertEqual(b.line, 2)
 
+        @unittest.skipIf(LEXER=='standard' and PARSER!='lalr', "Puppet error handling only works with LALR for now")
+        def test_match_examples(self):
+            p = _Lark(r"""
+                start: "a" "b" "c"
+            """)
+
+            def match_error(s):
+                try:
+                    _ = p.parse(s)
+                except UnexpectedInput as u:
+                    return u.match_examples(p.parse, {
+                        0: ['abe'],
+                        1: ['ab'],
+                    })
+                assert False
+
+            assert match_error("abe") == 0
+            assert match_error("ab") == 1
+
+
         @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
         def test_unicode_class(self):
             "Tests that character classes from the `regex` module work correctly."

From 7fa993320eebf8a27c28657f4c6d15b1bb210cd6 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 22 Nov 2020 10:04:48 +0200
Subject: [PATCH 10/13] match_examples() now works for Earley+Standard

Note: This refactor opens the door for implementing a ContextualLexer for Earley.
But unlike the existing one for LALR, it will have to be computed at runtime,
rather than ahead of time.
---
 lark/exceptions.py          |  2 ++
 lark/lexer.py               |  4 ++--
 lark/parser_frontends.py    |  3 ---
 lark/parsers/earley.py      | 16 ++++++++++------
 lark/parsers/lalr_parser.py | 10 ++++++++--
 lark/parsers/xearley.py     |  3 ++-
 lark/tree_matcher.py        | 10 +++++++++-
 tests/test_parser.py        |  6 +++++-
 8 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 92ac019..1d63561 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -123,6 +123,7 @@ class UnexpectedEOF(ParseError, UnexpectedInput):
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
+        # TODO considered_tokens and allowed can be figured out using state
         self.line = line
         self.column = column
         self.pos_in_stream = lex_pos
@@ -154,6 +155,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
     see: :ref:`ParserPuppet`.
     """
     def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, token_history=None):
+        # TODO considered_tokens and allowed can be figured out using state
         self.line = getattr(token, 'line', '?')
         self.column = getattr(token, 'column', '?')
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)
diff --git a/lark/lexer.py b/lark/lexer.py
index 8be8acd..bda8497 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -353,7 +353,7 @@ class TraditionalLexer(Lexer):
                     allowed = {"<END-OF-FILE>"}
                 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
                                            allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
-                                           state=(parser_state and parser_state.position))
+                                           state=parser_state)
 
             value, type_ = res
 
@@ -436,7 +436,7 @@ class ContextualLexer(Lexer):
             # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
             # This tests the input against the global context, to provide a nicer error.
             token = self.root_lexer.next_token(lexer_state, parser_state)
-            raise UnexpectedToken(token, e.allowed, state=parser_state.position, token_history=[lexer_state.last_token])
+            raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[lexer_state.last_token])
 
 
 class LexerThread:
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 337ddeb..abc0fba 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -173,9 +173,6 @@ class Earley(WithLexer):
         tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
         self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class)
 
-    def make_lexer(self, text):
-        return WithLexer.make_lexer(self, text).lex(None)
-
     def match(self, term, token):
         return term.name == token.type
 
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index aa18371..e4a220a 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -146,7 +146,7 @@ class Parser:
                         column.add(new_item)
                         items.append(new_item)
 
-    def _parse(self, stream, columns, to_scan, start_symbol=None):
+    def _parse(self, lexer, columns, to_scan, start_symbol=None):
         def is_quasi_complete(item):
             if item.is_complete:
                 return True
@@ -245,7 +245,7 @@ class Parser:
 
             if not next_set and not next_to_scan:
                 expect = {i.expect.name for i in to_scan}
-                raise UnexpectedToken(token, expect, considered_rules = set(to_scan))
+                raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.expect for i in to_scan))
 
             return next_to_scan
 
@@ -261,20 +261,24 @@ class Parser:
         # Completions will be added to the SPPF tree, and predictions will be recursively
         # processed down to terminals/empty nodes to be added to the scanner for the next
         # step.
+        expects = {i.expect for i in to_scan}
         i = 0
-        for token in stream:
+        for token in lexer.lex(expects):
             self.predict_and_complete(i, to_scan, columns, transitives)
 
             to_scan = scan(i, token, to_scan)
             i += 1
 
+            expects.clear()
+            expects |= {i.expect for i in to_scan}
+
         self.predict_and_complete(i, to_scan, columns, transitives)
 
         ## Column is now the final column in the parse.
         assert i == len(columns)-1
         return to_scan
 
-    def parse(self, stream, start):
+    def parse(self, lexer, start):
         assert start, start
         start_symbol = NonTerminal(start)
 
@@ -291,7 +295,7 @@ class Parser:
             else:
                 columns[0].add(item)
 
-        to_scan = self._parse(stream, columns, to_scan, start_symbol)
+        to_scan = self._parse(lexer, columns, to_scan, start_symbol)
 
         # If the parse was successful, the start
         # symbol should have been completed in the last step of the Earley cycle, and will be in
@@ -299,7 +303,7 @@ class Parser:
         solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
         if not solutions:
             expected_terminals = [t.expect for t in to_scan]
-            raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan})
+            raise UnexpectedEOF(expected_terminals, state=frozenset(i.expect for i in to_scan))
 
         if self.debug:
             from .earley_forest import ForestToPyDotVisitor
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index e8c4432..3d006e7 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -3,7 +3,7 @@
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 from copy import deepcopy, copy
-from ..exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
+from ..exceptions import UnexpectedInput, UnexpectedToken
 from ..lexer import Token
 
 from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
@@ -62,6 +62,12 @@ class ParserState:
     def position(self):
         return self.state_stack[-1]
 
+    # Necessary for match_examples() to work
+    def __eq__(self, other):
+        if not isinstance(other, ParserState):
+            return False
+        return self.position == other.position
+
     def __copy__(self):
         return type(self)(
             self.parse_conf,
@@ -86,7 +92,7 @@ class ParserState:
                 action, arg = states[state][token.type]
             except KeyError:
                 expected = {s for s in states[state].keys() if s.isupper()}
-                raise UnexpectedToken(token, expected, state=state, puppet=None)
+                raise UnexpectedToken(token, expected, state=self, puppet=None)
 
             assert arg != end_state
 
diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py
index ae98f0f..cf9b6ec 100644
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -113,7 +113,8 @@ class Parser(BaseParser):
             del delayed_matches[i+1]    # No longer needed, so unburden memory
 
             if not next_set and not delayed_matches and not next_to_scan:
-                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan})
+                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
+                                           set(to_scan), state=frozenset(i.expect for i in to_scan))
 
             return next_to_scan
 
diff --git a/lark/tree_matcher.py b/lark/tree_matcher.py
index 8c1f17a..c9d9fde 100644
--- a/lark/tree_matcher.py
+++ b/lark/tree_matcher.py
@@ -69,6 +69,14 @@ def parse_rulename(s):
     return name, args
 
 
+
+class ChildrenLexer:
+    def __init__(self, children):
+        self.children = children
+
+    def lex(self, parser_state):
+        return self.children
+
 class TreeMatcher:
     """Match the elements of a tree node, based on an ontology
     provided by a Lark grammar.
@@ -173,6 +181,6 @@ class TreeMatcher:
             self._parser_cache[rulename] = parser
 
         # find a full derivation
-        unreduced_tree = parser.parse(tree.children, rulename)
+        unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename)
         assert unreduced_tree.data == rulename
         return unreduced_tree
diff --git a/tests/test_parser.py b/tests/test_parser.py
index edb4b26..863bf5d 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2342,7 +2342,7 @@ def _make_parser_test(LEXER, PARSER):
                 self.assertEqual(a.line, 1)
                 self.assertEqual(b.line, 2)
 
-        @unittest.skipIf(LEXER=='standard' and PARSER!='lalr', "Puppet error handling only works with LALR for now")
+        @unittest.skipIf(PARSER=='cyk', "match_examples() not supported for CYK")
         def test_match_examples(self):
             p = _Lark(r"""
                 start: "a" "b" "c"
@@ -2355,11 +2355,15 @@ def _make_parser_test(LEXER, PARSER):
                     return u.match_examples(p.parse, {
                         0: ['abe'],
                         1: ['ab'],
+                        2: ['cbc'],
                     })
                 assert False
 
             assert match_error("abe") == 0
             assert match_error("ab") == 1
+            assert match_error("bbc") == 2
+            assert match_error("cbc") == 2
+            self.assertEqual( match_error("dbc"), 2 )
 
 
         @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')

From e6bbfd16c0e50a7f20a51ff8edb3bf0797a68594 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 23 Nov 2020 10:24:44 +0200
Subject: [PATCH 11/13] Fixed comment

---
 lark/exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 1d63561..44f8cbb 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -155,7 +155,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
     see: :ref:`ParserPuppet`.
     """
     def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, token_history=None):
-        # TODO considered_tokens and allowed can be figured out using state
+        # TODO considered_rules and expected can be figured out using state
         self.line = getattr(token, 'line', '?')
         self.column = getattr(token, 'column', '?')
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)

From 70c233e3010ffbcb9eaeeec65944006558afcd43 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 23 Nov 2020 10:48:18 +0200
Subject: [PATCH 12/13] Update links in pypi (Issue #714)

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 382943e..b3897c5 100644
--- a/setup.py
+++ b/setup.py
@@ -29,8 +29,8 @@ setup(
     description = "a modern parsing library",
     license = "MIT",
     keywords = "Earley LALR parser parsing ast",
-    url = "https://github.com/erezsh/lark",
-    download_url = "https://github.com/erezsh/lark/tarball/master",
+    url = "https://github.com/lark-parser/lark",
+    download_url = "https://github.com/lark-parser/lark/tarball/master",
     long_description='''
 Lark is a modern general-purpose parsing library for Python.
 

From e6dcc434786a1f3a4e53581673e05c6663fcef16 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 23 Nov 2020 22:44:49 +0200
Subject: [PATCH 13/13] Improve match_examples() for Earley (Issue #760)

---
 lark/parsers/earley.py  | 4 ++--
 lark/parsers/xearley.py | 2 +-
 tests/test_parser.py    | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index e4a220a..320b59a 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -245,7 +245,7 @@ class Parser:
 
             if not next_set and not next_to_scan:
                 expect = {i.expect.name for i in to_scan}
-                raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.expect for i in to_scan))
+                raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
 
             return next_to_scan
 
@@ -303,7 +303,7 @@ class Parser:
         solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
         if not solutions:
             expected_terminals = [t.expect for t in to_scan]
-            raise UnexpectedEOF(expected_terminals, state=frozenset(i.expect for i in to_scan))
+            raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
 
         if self.debug:
             from .earley_forest import ForestToPyDotVisitor
diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py
index cf9b6ec..d965421 100644
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -114,7 +114,7 @@ class Parser(BaseParser):
 
             if not next_set and not delayed_matches and not next_to_scan:
                 raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
-                                           set(to_scan), state=frozenset(i.expect for i in to_scan))
+                                           set(to_scan), state=frozenset(i.s for i in to_scan))
 
             return next_to_scan
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 863bf5d..bb807d4 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -2355,7 +2355,7 @@ def _make_parser_test(LEXER, PARSER):
                     return u.match_examples(p.parse, {
                         0: ['abe'],
                         1: ['ab'],
-                        2: ['cbc'],
+                        2: ['cbc', 'dbc'],
                     })
                 assert False
 
@@ -2364,6 +2364,7 @@ def _make_parser_test(LEXER, PARSER):
             assert match_error("bbc") == 2
             assert match_error("cbc") == 2
             self.assertEqual( match_error("dbc"), 2 )
+            self.assertEqual( match_error("ebc"), 2 )
 
 
         @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')