From d44658fa3f6d15f5d9ef4e21966373d3700e39b0 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 5 Feb 2017 17:46:32 +0200 Subject: [PATCH] Now supports empty rules. Added some tests. All tests passing. --- lark/grammar_analysis.py | 8 ++++---- lark/lark.py | 3 ++- lark/load_grammar.py | 8 +++++++- lark/parser.py | 7 +++++-- lark/tree.py | 11 +++++++++-- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/lark/grammar_analysis.py b/lark/grammar_analysis.py index cc051ca..fb1ddb8 100644 --- a/lark/grammar_analysis.py +++ b/lark/grammar_analysis.py @@ -15,7 +15,6 @@ class Rule(object): expansion : a list of symbols """ def __init__(self, origin, expansion, alias=None): - assert expansion, "No support for empty rules" self.origin = origin self.expansion = expansion self.alias = alias @@ -91,9 +90,10 @@ class GrammarAnalyzer(object): init_ptr = RulePtr(r, 0) init_ptrs.add(init_ptr) - new_r = init_ptr.next - if not is_terminal(new_r): - yield new_r + if r.expansion: # if not empty rule + new_r = init_ptr.next + if not is_terminal(new_r): + yield new_r _ = list(bfs([rule], _expand_rule)) diff --git a/lark/lark.py b/lark/lark.py index 87d19bd..19996fa 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -32,7 +32,6 @@ class LarkOptions(object): self.debug = bool(o.pop('debug', False)) self.only_lex = bool(o.pop('only_lex', False)) self.keep_all_tokens = bool(o.pop('keep_all_tokens', False)) - self.keep_empty_trees = bool(o.pop('keep_empty_trees', True)) self.tree_class = o.pop('tree_class', Tree) self.cache_grammar = o.pop('cache_grammar', False) self.ignore_postproc = bool(o.pop('ignore_postproc', False)) @@ -42,6 +41,8 @@ class LarkOptions(object): assert self.parser in ENGINE_DICT if self.parser == 'earley' and self.transformer: raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm. Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') + if self.keep_all_tokens: + raise NotImplementedError("Not implemented yet!") if o: raise ValueError("Unknown options: %s" % o.keys()) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 01a4aa5..5b4e037 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -74,7 +74,7 @@ RULES = [ ('expansion', ['_expansion']), ('expansion', ['_expansion', 'TO', 'RULE']), - ('_expansion', ['expr']), + ('_expansion', []), ('_expansion', ['_expansion', 'expr']), ('expr', ['atom']), @@ -149,6 +149,12 @@ class SaveDefinitions(object): def tokenvalue(self, tokenvalue): value = tokenvalue.value[1:-1] + import codecs + decoder = codecs.getdecoder('unicode_escape') + if '\u' in value: + # XXX for now, you can't mix unicode escaping and unicode characters at the same token + value = decoder(value)[0] + if tokenvalue.type == 'STRING': value = re.escape(value) return tokenvalue, value diff --git a/lark/parser.py b/lark/parser.py index 2695009..5d66164 100644 --- a/lark/parser.py +++ b/lark/parser.py @@ -26,8 +26,11 @@ class Parser(object): raise ParseError("Unexpected input %r.\nExpected: %s\nContext: %s" % (key, expected, context)) def reduce(rule): - s = stack[-len(rule.expansion):] - del stack[-len(rule.expansion):] + if rule.expansion: + s = stack[-len(rule.expansion):] + del stack[-len(rule.expansion):] + else: + s = [] res = self.callbacks[rule]([x[0] for x in s]) diff --git a/lark/tree.py b/lark/tree.py index a056430..5f91605 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -1,3 +1,4 @@ +from copy import deepcopy from utils import inline_args class Tree(object): @@ -29,6 +30,8 @@ class Tree(object): kid = self.children[i] self.children[i:i+1] = kid.children + def __eq__(self, other): + return self.data == other.data and self.children == other.children # def find_path(self, pred): # if pred(self): @@ -49,8 +52,12 @@ class Tree(object): # x = self.follow_path(path[:-1]) # x.children[path[-1]] = value - def clone(self): - return Tree(self.data, [c.clone() if isinstance(c, Tree) else c for c in self.children]) + # def clone(self): + # return Tree(self.data, [c.clone() if isinstance(c, Tree) else c for c in self.children]) + + def __deepcopy__(self, memo): + return type(self)(self.data, deepcopy(self.children, memo)) + class Transformer(object):