Browse Source

Merge branch 'puppet'

Erez Sh 4 years ago
6 changed files with 129 additions and 11 deletions
  1. +13
  2. +1
  3. +34
  4. +2
  5. +19
  6. +60

+ 13
- 2
docs/ View File

@@ -25,12 +25,21 @@ Example:

#### parse(self, text)
#### parse(self, text, start=None, on_error=None)

Return a complete parse tree for the text (of type Tree)
Parse the given text, according to the options provided.

Returns a complete parse tree for the text (of type Tree)

If a transformer is supplied to `__init__`, returns whatever is the result of the transformation.


* start: str - required if Lark was given multiple possible start symbols (using the start option).

* on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only.

(See `examples/` for an example of how to use `on_error`.)

#### save(self, f) / load(cls, f)

@@ -160,6 +169,8 @@ See the [visitors page](

## UnexpectedToken

TODO: Explain puppet mechanism (related to on_error)

## UnexpectedException

- `UnexpectedInput`

+ 1
- 0
docs/ View File

@@ -6,6 +6,7 @@
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](
- Builds a parse-tree (AST) automagically based on the grammar
- Stand-alone parser generator - create a small independent parser to embed in your project.
- Flexible error handling by using a "puppet parser" mechanism (LALR only)
- Automatic line & column tracking (for both tokens and matched rules)
- Automatic terminal collision resolution
- Standard library of terminals (strings, numbers, names, etc.)

+ 34
- 0
examples/ View File

@@ -0,0 +1,34 @@
# This example demonstrates error handling using a parsing puppet in LALR
# When the parser encounters an UnexpectedToken exception, it creates a
# parsing puppet with the current parse-state, and lets you control how
# to proceed step-by-step. When you've achieved the correct parse-state,
# you can resume the run by returning True.

from lark import UnexpectedToken, Token

from .json_parser import json_parser

def ignore_errors(e):
if e.token.type == 'COMMA':
# Skip comma
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
return True

# Unhandled error. Will stop parse and raise exception
return False

def main():
s = "[0 1, 2,, 3,,, 4, 5 6 ]"
res = json_parser.parse(s, on_error=ignore_errors)
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]


+ 2
- 1
lark/ View File

@@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput):

class UnexpectedToken(ParseError, UnexpectedInput):
def __init__(self, token, expected, considered_rules=None, state=None):
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
self.token = token
self.expected = expected # XXX str shouldn't necessary
self.line = getattr(token, 'line', '?')
@@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.considered_rules = considered_rules
self.state = state
self.pos_in_stream = getattr(token, 'pos_in_stream', None)
self.puppet = puppet

message = ("Unexpected token %r at line %s, column %s.\n"
"Expected one of: \n\t* %s\n"

+ 19
- 4
lark/ View File

@@ -9,7 +9,7 @@ from .load_grammar import load_grammar
from .tree import Tree
from .common import LexerConf, ParserConf

from .lexer import Lexer, TraditionalLexer, TerminalDef
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend
from .grammar import Rule
@@ -359,13 +359,28 @@ class Lark(Serialize):
"Get information about a terminal"
return self._terminals_dict[name]

def parse(self, text, start=None):
def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided.

The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
start: str - required if Lark was given multiple possible start symbols (using the start option).
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only.

Returns a tree, unless specified otherwise.
return self.parser.parse(text, start=start)
return self.parser.parse(text, start=start)
except UnexpectedToken as e:
if on_error is None:

while True:
if not on_error(e):
raise e
return e.puppet.resume_parse()
except UnexpectedToken as e2:
e = e2


+ 60
- 4
lark/parsers/ View File

@@ -41,15 +41,15 @@ class _Parser:
self.callbacks = callbacks
self.debug = debug

def parse(self, seq, start, set_state=None):
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None):
token = None
stream = iter(seq)
states = self.parse_table.states
start_state = self.parse_table.start_states[start]
end_state = self.parse_table.end_states[start]

state_stack = [start_state]
value_stack = []
state_stack = state_stack or [start_state]
value_stack = value_stack or []

if set_state: set_state(start_state)

@@ -59,7 +59,7 @@ class _Parser:
return states[state][token.type]
except KeyError:
expected = [s for s in states[state].keys() if s.isupper()]
raise UnexpectedToken(token, expected, state=state)
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state))

def reduce(rule):
size = len(rule.expansion)
@@ -111,3 +111,59 @@ class _Parser:
return value_stack[-1]


class _ParserPuppet:
def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
self.parser = parser
self._state_stack = state_stack
self._value_stack = value_stack
self._start = start
self._stream = stream
self._set_state = set_state

def feed_token(self, token):
end_state = self.parser.parse_table.end_states[self._start]
state_stack = self._state_stack
value_stack = self._value_stack

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

while action is Reduce:
rule = arg
size = len(rule.expansion)
if size:
s = value_stack[-size:]
del state_stack[-size:]
del value_stack[-size:]
s = []

value = self.parser.callbacks[rule](s)

_action, new_state = self.parser.parse_table.states[state_stack[-1]][]
assert _action is Shift

if state_stack[-1] == end_state:
return value_stack[-1]

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

assert action is Shift

def choices(self):
return self.parser.parse_table.states[self._state_stack[-1]]

def resume_parse(self):
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)
