From 818917270139651ba82c20f216fee30266bbcc52 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 19 Feb 2017 14:49:23 +0200 Subject: [PATCH 1/9] recognizer working --- lark/parsers/earley2.py | 107 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 lark/parsers/earley2.py diff --git a/lark/parsers/earley2.py b/lark/parsers/earley2.py new file mode 100644 index 0000000..80144f9 --- /dev/null +++ b/lark/parsers/earley2.py @@ -0,0 +1,107 @@ +from ..utils import classify, classify_bool, bfs, fzset +from ..common import GrammarError, is_terminal +from lalr_analysis import Rule, RulePtr, GrammarAnalyzer + +class Item: + def __init__(self, rule_ptr, start): + self.rule_ptr = rule_ptr + self.start = start + + @property + def expect(self): + return self.rule_ptr.next + + @property + def is_complete(self): + return self.rule_ptr.is_satisfied + + def advance(self): + return Item(self.rule_ptr.advance(self.expect), self.start) + + def __eq__(self, other): + return self.rule_ptr == other.rule_ptr and self.start == other.start + def __hash__(self): + return hash((self.rule_ptr, self.start)) + + def __repr__(self): + return '%s (%s)' % (self.rule_ptr, self.start) + +class Parser: + def __init__(self, rules, start): + self.analyzer = GrammarAnalyzer(rules, start) + self.start = start + + + def parse(self, stream): + # Define parser functions + + def predict(symbol, i): + assert not is_terminal(symbol), symbol + return {Item(rp, i) for rp in self.analyzer.expand_rule(symbol)} + + def scan(item, inp): + if item.expect == inp: # TODO Do a smarter match, i.e. regexp + return {item.advance()} + else: + return set() + + def complete(item, table): + print "Complete:", item + name = item.rule_ptr.rule.origin + return {old_item.advance() for old_item in table[item.start] + if old_item.expect == name} + + def process_column(i, char): + cur_set = table[-1] + next_set = set() + table.append(next_set) + + to_process = cur_set + while to_process: + new_items = set() + for item in to_process: + if item.is_complete: + new_items |= complete(item, table) + else: + if is_terminal(item.expect): + next_set |= scan(item, char) + else: + new_items |= predict(item.expect, i) + + to_process = new_items - cur_set + cur_set |= to_process + + # Main loop starts + + table = [predict(self.start, 0)] + + for i, char in enumerate(stream): + process_column(i, char) + + process_column(len(stream), None) + + + + + +# rules = [ +# ('a', ['a', 'A']), +# ('a', ['A']), +# ] + +# p = Parser(rules, 'a') +# p.parse('AAA') + +rules = [ + ('sum', ['sum', "A", 'product']), + ('sum', ['product']), + ('product', ['product', "M", 'factor']), + ('product', ['factor']), + ('factor', ['L', 'sum', 'R']), + ('factor', ['number']), + ('number', ['N', 'number']), + ('number', ['N']), +] + +p = Parser(rules, 'sum') +p.parse('NALNMNANR') From 07df4b80eb5719e1ad9f7c8109d7fb51949601a8 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:11:09 +0200 Subject: [PATCH 2/9] My Earley parser is now working. Not yet plugged in --- lark/parsers/earley2.py | 80 ++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/lark/parsers/earley2.py b/lark/parsers/earley2.py index 80144f9..c41dfa5 100644 --- a/lark/parsers/earley2.py +++ b/lark/parsers/earley2.py @@ -1,11 +1,13 @@ -from ..utils import classify, classify_bool, bfs, fzset -from ..common import GrammarError, is_terminal -from lalr_analysis import Rule, RulePtr, GrammarAnalyzer +from ..common import ParseError, UnexpectedToken, is_terminal +from lalr_analysis import GrammarAnalyzer + +from ..tree import Tree class Item: - def __init__(self, rule_ptr, start): + def __init__(self, rule_ptr, start, data): self.rule_ptr = rule_ptr self.start = start + self.data = data @property def expect(self): @@ -15,8 +17,12 @@ class Item: def is_complete(self): return self.rule_ptr.is_satisfied - def advance(self): - return Item(self.rule_ptr.advance(self.expect), self.start) + @property + def name(self): + return self.rule_ptr.rule.origin + + def advance(self, data): + return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data]) def __eq__(self, other): return self.rule_ptr == other.rule_ptr and self.start == other.start @@ -26,6 +32,7 @@ class Item: def __repr__(self): return '%s (%s)' % (self.rule_ptr, self.start) + class Parser: def __init__(self, rules, start): self.analyzer = GrammarAnalyzer(rules, start) @@ -37,19 +44,19 @@ class Parser: def predict(symbol, i): assert not is_terminal(symbol), symbol - return {Item(rp, i) for rp in self.analyzer.expand_rule(symbol)} + return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)} def scan(item, inp): if item.expect == inp: # TODO Do a smarter match, i.e. regexp - return {item.advance()} + return {item.advance(inp)} else: return set() def complete(item, table): - print "Complete:", item - name = item.rule_ptr.rule.origin - return {old_item.advance() for old_item in table[item.start] - if old_item.expect == name} + name = item.name + item.data = Tree(name, item.data) + return {old_item.advance(item.data) for old_item in table[item.start] + if not old_item.is_complete and old_item.expect == name} def process_column(i, char): cur_set = table[-1] @@ -71,6 +78,10 @@ class Parser: to_process = new_items - cur_set cur_set |= to_process + if not next_set and char != '$end': + expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete]) + raise UnexpectedToken(char, expect, stream, i) + # Main loop starts table = [predict(self.start, 0)] @@ -78,7 +89,16 @@ class Parser: for i, char in enumerate(stream): process_column(i, char) - process_column(len(stream), None) + process_column(len(stream), '$end') + + # Parse ended. Now build a parse tree + solutions = [n.data for n in table[len(stream)] + if n.is_complete and n.name==self.start and n.start==0] + + if not solutions: + raise ParseError('Incomplete parse: Could not find a solution to input') + + return solutions @@ -86,22 +106,26 @@ class Parser: # rules = [ # ('a', ['a', 'A']), +# ('a', ['a', 'A', 'a']), +# ('a', ['a', 'A', 'A', 'a']), # ('a', ['A']), # ] # p = Parser(rules, 'a') -# p.parse('AAA') - -rules = [ - ('sum', ['sum', "A", 'product']), - ('sum', ['product']), - ('product', ['product', "M", 'factor']), - ('product', ['factor']), - ('factor', ['L', 'sum', 'R']), - ('factor', ['number']), - ('number', ['N', 'number']), - ('number', ['N']), -] - -p = Parser(rules, 'sum') -p.parse('NALNMNANR') +# for x in p.parse('AAAA'): +# print '->' +# print x.pretty() + +# rules = [ +# ('sum', ['sum', "A", 'product']), +# ('sum', ['product']), +# ('product', ['product', "M", 'factor']), +# ('product', ['factor']), +# ('factor', ['L', 'sum', 'R']), +# ('factor', ['number']), +# ('number', ['N', 'number']), +# ('number', ['N']), +# ] + +# p = Parser(rules, 'sum') +# print p.parse('NALNMNANR') From 972034fd2d584b3571ef923fd6700a41f9bd8d92 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:11:56 +0200 Subject: [PATCH 3/9] Began refactoring of lalr_analysis -> grammar_analysis --- lark/parsers/lalr_analysis.py | 106 +++++++++++++++++----------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index e50de18..bdd6c73 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -57,6 +57,58 @@ def update_set(set1, set2): set1 |= set2 return set1 != copy +def calculate_sets(rules): + """Calculate FOLLOW sets. + + Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" + symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} + symbols.add('$root') # what about other unused rules? + + # foreach grammar rule X ::= Y(1) ... Y(k) + # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then + # NULLABLE = NULLABLE union {X} + # for i = 1 to k + # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then + # FIRST(X) = FIRST(X) union FIRST(Y(i)) + # for j = i+1 to k + # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) + # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) + # until none of NULLABLE,FIRST,FOLLOW changed in last iteration + + NULLABLE = set() + FIRST = {} + FOLLOW = {} + for sym in symbols: + FIRST[sym]={sym} if is_terminal(sym) else set() + FOLLOW[sym]=set() + + changed = True + while changed: + changed = False + + for rule in rules: + if set(rule.expansion) <= NULLABLE: + if update_set(NULLABLE, {rule.origin}): + changed = True + + for i, sym in enumerate(rule.expansion): + if set(rule.expansion[:i]) <= NULLABLE: + if update_set(FIRST[rule.origin], FIRST[sym]): + changed = True + if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: + if update_set(FOLLOW[sym], FOLLOW[rule.origin]): + changed = True + + for j in range(i+1, len(rule.expansion)): + if set(rule.expansion[i+1:j]) <= NULLABLE: + if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): + changed = True + + return FIRST, FOLLOW, NULLABLE + + class GrammarAnalyzer(object): def __init__(self, rule_tuples, start_symbol, debug=False): self.start_symbol = start_symbol @@ -79,6 +131,8 @@ class GrammarAnalyzer(object): self.init_state = self.expand_rule(start_symbol) + self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) + def expand_rule(self, rule): "Returns all init_ptrs accessible by rule (recursive)" init_ptrs = set() @@ -104,59 +158,7 @@ class GrammarAnalyzer(object): else: return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} - def _calc(self): - """Calculate FOLLOW sets. - - Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" - symbols = {sym for rule in self.rules for sym in rule.expansion} | {rule.origin for rule in self.rules} - symbols.add('$root') # what about other unused rules? - - # foreach grammar rule X ::= Y(1) ... Y(k) - # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then - # NULLABLE = NULLABLE union {X} - # for i = 1 to k - # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then - # FIRST(X) = FIRST(X) union FIRST(Y(i)) - # for j = i+1 to k - # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) - # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) - # until none of NULLABLE,FIRST,FOLLOW changed in last iteration - - NULLABLE = set() - FIRST = {} - FOLLOW = {} - for sym in symbols: - FIRST[sym]={sym} if is_terminal(sym) else set() - FOLLOW[sym]=set() - - changed = True - while changed: - changed = False - - for rule in self.rules: - if set(rule.expansion) <= NULLABLE: - if update_set(NULLABLE, {rule.origin}): - changed = True - - for i, sym in enumerate(rule.expansion): - if set(rule.expansion[:i]) <= NULLABLE: - if update_set(FIRST[rule.origin], FIRST[sym]): - changed = True - if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: - if update_set(FOLLOW[sym], FOLLOW[rule.origin]): - changed = True - - for j in range(i+1, len(rule.expansion)): - if set(rule.expansion[i+1:j]) <= NULLABLE: - if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): - changed = True - - self.FOLLOW = FOLLOW - def analyze(self): - self._calc() self.states = {} def step(state): From b95567c4a728ab30416f488a65fc8a0d90608288 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:13:20 +0200 Subject: [PATCH 4/9] Another refactoring step --- lark/parsers/earley2.py | 2 +- lark/parsers/grammar_analysis.py | 212 ++++++++++++++++++++++++++++++ lark/parsers/lalr_analysis.py | 213 +------------------------------ 3 files changed, 214 insertions(+), 213 deletions(-) create mode 100644 lark/parsers/grammar_analysis.py diff --git a/lark/parsers/earley2.py b/lark/parsers/earley2.py index c41dfa5..7527248 100644 --- a/lark/parsers/earley2.py +++ b/lark/parsers/earley2.py @@ -1,5 +1,5 @@ from ..common import ParseError, UnexpectedToken, is_terminal -from lalr_analysis import GrammarAnalyzer +from grammar_analysis import GrammarAnalyzer from ..tree import Tree diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py new file mode 100644 index 0000000..bdd6c73 --- /dev/null +++ b/lark/parsers/grammar_analysis.py @@ -0,0 +1,212 @@ +import logging +from collections import defaultdict, deque + +from ..utils import classify, classify_bool, bfs, fzset +from ..common import GrammarError, is_terminal + +ACTION_SHIFT = 0 + +class Rule(object): + """ + origin : a symbol + expansion : a list of symbols + """ + def __init__(self, origin, expansion, alias=None): + self.origin = origin + self.expansion = expansion + self.alias = alias + + def __repr__(self): + return '<%s : %s>' % (self.origin, ' '.join(self.expansion)) + +class RulePtr(object): + def __init__(self, rule, index): + assert isinstance(rule, Rule) + assert index <= len(rule.expansion) + self.rule = rule + self.index = index + + def __repr__(self): + before = self.rule.expansion[:self.index] + after = self.rule.expansion[self.index:] + return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after)) + + @property + def next(self): + return self.rule.expansion[self.index] + + def advance(self, sym): + assert self.next == sym + return RulePtr(self.rule, self.index+1) + + @property + def is_satisfied(self): + return self.index == len(self.rule.expansion) + + def __eq__(self, other): + return self.rule == other.rule and self.index == other.index + def __hash__(self): + return hash((self.rule, self.index)) + + +def pairs(lst): + return zip(lst[:-1], lst[1:]) + +def update_set(set1, set2): + copy = set(set1) + set1 |= set2 + return set1 != copy + +def calculate_sets(rules): + """Calculate FOLLOW sets. + + Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" + symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} + symbols.add('$root') # what about other unused rules? + + # foreach grammar rule X ::= Y(1) ... Y(k) + # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then + # NULLABLE = NULLABLE union {X} + # for i = 1 to k + # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then + # FIRST(X) = FIRST(X) union FIRST(Y(i)) + # for j = i+1 to k + # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) + # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) + # until none of NULLABLE,FIRST,FOLLOW changed in last iteration + + NULLABLE = set() + FIRST = {} + FOLLOW = {} + for sym in symbols: + FIRST[sym]={sym} if is_terminal(sym) else set() + FOLLOW[sym]=set() + + changed = True + while changed: + changed = False + + for rule in rules: + if set(rule.expansion) <= NULLABLE: + if update_set(NULLABLE, {rule.origin}): + changed = True + + for i, sym in enumerate(rule.expansion): + if set(rule.expansion[:i]) <= NULLABLE: + if update_set(FIRST[rule.origin], FIRST[sym]): + changed = True + if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: + if update_set(FOLLOW[sym], FOLLOW[rule.origin]): + changed = True + + for j in range(i+1, len(rule.expansion)): + if set(rule.expansion[i+1:j]) <= NULLABLE: + if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): + changed = True + + return FIRST, FOLLOW, NULLABLE + + +class GrammarAnalyzer(object): + def __init__(self, rule_tuples, start_symbol, debug=False): + self.start_symbol = start_symbol + self.debug = debug + rule_tuples = list(rule_tuples) + rule_tuples.append(('$root', [start_symbol, '$end'])) + rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] + + self.rules = set() + self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples} + for origin, exp, alias in rule_tuples: + r = Rule( origin, exp, alias ) + self.rules.add(r) + self.rules_by_origin[origin].append(r) + + for r in self.rules: + for sym in r.expansion: + if not (is_terminal(sym) or sym in self.rules_by_origin): + raise GrammarError("Using an undefined rule: %s" % sym) + + self.init_state = self.expand_rule(start_symbol) + + self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) + + def expand_rule(self, rule): + "Returns all init_ptrs accessible by rule (recursive)" + init_ptrs = set() + def _expand_rule(rule): + assert not is_terminal(rule) + + for r in self.rules_by_origin[rule]: + init_ptr = RulePtr(r, 0) + init_ptrs.add(init_ptr) + + if r.expansion: # if not empty rule + new_r = init_ptr.next + if not is_terminal(new_r): + yield new_r + + _ = list(bfs([rule], _expand_rule)) + + return fzset(init_ptrs) + + def _first(self, r): + if is_terminal(r): + return {r} + else: + return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} + + def analyze(self): + + self.states = {} + def step(state): + lookahead = defaultdict(list) + sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) + for rp in sat: + for term in self.FOLLOW.get(rp.rule.origin, ()): + lookahead[term].append(('reduce', rp.rule)) + + d = classify(unsat, lambda rp: rp.next) + for sym, rps in d.items(): + rps = {rp.advance(sym) for rp in rps} + + for rp in set(rps): + if not rp.is_satisfied and not is_terminal(rp.next): + rps |= self.expand_rule(rp.next) + + lookahead[sym].append(('shift', fzset(rps))) + yield fzset(rps) + + for k, v in lookahead.items(): + if len(v) > 1: + if self.debug: + logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) + for x in v: + # XXX resolving shift/reduce into shift, like PLY + # Give a proper warning + if x[0] == 'shift': + lookahead[k] = [x] + + for k, v in lookahead.items(): + assert len(v) == 1, ("Collision", k, v) + + self.states[state] = {k:v[0] for k, v in lookahead.items()} + + for _ in bfs([self.init_state], step): + pass + + # -- + self.enum = list(self.states) + self.enum_rev = {s:i for i,s in enumerate(self.enum)} + self.states_idx = {} + + for s, la in self.states.items(): + la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' + else (v[0], (v[1], len(v[1].expansion))) # Reduce + for k,v in la.items()} + self.states_idx[ self.enum_rev[s] ] = la + + + self.init_state_idx = self.enum_rev[self.init_state] diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index bdd6c73..4384ca8 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -1,212 +1 @@ -import logging -from collections import defaultdict, deque - -from ..utils import classify, classify_bool, bfs, fzset -from ..common import GrammarError, is_terminal - -ACTION_SHIFT = 0 - -class Rule(object): - """ - origin : a symbol - expansion : a list of symbols - """ - def __init__(self, origin, expansion, alias=None): - self.origin = origin - self.expansion = expansion - self.alias = alias - - def __repr__(self): - return '<%s : %s>' % (self.origin, ' '.join(self.expansion)) - -class RulePtr(object): - def __init__(self, rule, index): - assert isinstance(rule, Rule) - assert index <= len(rule.expansion) - self.rule = rule - self.index = index - - def __repr__(self): - before = self.rule.expansion[:self.index] - after = self.rule.expansion[self.index:] - return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after)) - - @property - def next(self): - return self.rule.expansion[self.index] - - def advance(self, sym): - assert self.next == sym - return RulePtr(self.rule, self.index+1) - - @property - def is_satisfied(self): - return self.index == len(self.rule.expansion) - - def __eq__(self, other): - return self.rule == other.rule and self.index == other.index - def __hash__(self): - return hash((self.rule, self.index)) - - -def pairs(lst): - return zip(lst[:-1], lst[1:]) - -def update_set(set1, set2): - copy = set(set1) - set1 |= set2 - return set1 != copy - -def calculate_sets(rules): - """Calculate FOLLOW sets. - - Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" - symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} - symbols.add('$root') # what about other unused rules? - - # foreach grammar rule X ::= Y(1) ... Y(k) - # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then - # NULLABLE = NULLABLE union {X} - # for i = 1 to k - # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then - # FIRST(X) = FIRST(X) union FIRST(Y(i)) - # for j = i+1 to k - # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) - # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then - # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) - # until none of NULLABLE,FIRST,FOLLOW changed in last iteration - - NULLABLE = set() - FIRST = {} - FOLLOW = {} - for sym in symbols: - FIRST[sym]={sym} if is_terminal(sym) else set() - FOLLOW[sym]=set() - - changed = True - while changed: - changed = False - - for rule in rules: - if set(rule.expansion) <= NULLABLE: - if update_set(NULLABLE, {rule.origin}): - changed = True - - for i, sym in enumerate(rule.expansion): - if set(rule.expansion[:i]) <= NULLABLE: - if update_set(FIRST[rule.origin], FIRST[sym]): - changed = True - if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE: - if update_set(FOLLOW[sym], FOLLOW[rule.origin]): - changed = True - - for j in range(i+1, len(rule.expansion)): - if set(rule.expansion[i+1:j]) <= NULLABLE: - if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): - changed = True - - return FIRST, FOLLOW, NULLABLE - - -class GrammarAnalyzer(object): - def __init__(self, rule_tuples, start_symbol, debug=False): - self.start_symbol = start_symbol - self.debug = debug - rule_tuples = list(rule_tuples) - rule_tuples.append(('$root', [start_symbol, '$end'])) - rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] - - self.rules = set() - self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples} - for origin, exp, alias in rule_tuples: - r = Rule( origin, exp, alias ) - self.rules.add(r) - self.rules_by_origin[origin].append(r) - - for r in self.rules: - for sym in r.expansion: - if not (is_terminal(sym) or sym in self.rules_by_origin): - raise GrammarError("Using an undefined rule: %s" % sym) - - self.init_state = self.expand_rule(start_symbol) - - self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules) - - def expand_rule(self, rule): - "Returns all init_ptrs accessible by rule (recursive)" - init_ptrs = set() - def _expand_rule(rule): - assert not is_terminal(rule) - - for r in self.rules_by_origin[rule]: - init_ptr = RulePtr(r, 0) - init_ptrs.add(init_ptr) - - if r.expansion: # if not empty rule - new_r = init_ptr.next - if not is_terminal(new_r): - yield new_r - - _ = list(bfs([rule], _expand_rule)) - - return fzset(init_ptrs) - - def _first(self, r): - if is_terminal(r): - return {r} - else: - return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} - - def analyze(self): - - self.states = {} - def step(state): - lookahead = defaultdict(list) - sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) - for rp in sat: - for term in self.FOLLOW.get(rp.rule.origin, ()): - lookahead[term].append(('reduce', rp.rule)) - - d = classify(unsat, lambda rp: rp.next) - for sym, rps in d.items(): - rps = {rp.advance(sym) for rp in rps} - - for rp in set(rps): - if not rp.is_satisfied and not is_terminal(rp.next): - rps |= self.expand_rule(rp.next) - - lookahead[sym].append(('shift', fzset(rps))) - yield fzset(rps) - - for k, v in lookahead.items(): - if len(v) > 1: - if self.debug: - logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) - for x in v: - # XXX resolving shift/reduce into shift, like PLY - # Give a proper warning - if x[0] == 'shift': - lookahead[k] = [x] - - for k, v in lookahead.items(): - assert len(v) == 1, ("Collision", k, v) - - self.states[state] = {k:v[0] for k, v in lookahead.items()} - - for _ in bfs([self.init_state], step): - pass - - # -- - self.enum = list(self.states) - self.enum_rev = {s:i for i,s in enumerate(self.enum)} - self.states_idx = {} - - for s, la in self.states.items(): - la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' - else (v[0], (v[1], len(v[1].expansion))) # Reduce - for k,v in la.items()} - self.states_idx[ self.enum_rev[s] ] = la - - - self.init_state_idx = self.enum_rev[self.init_state] +from grammar_analysis import GrammarAnalyzer, ACTION_SHIFT From d5123812b07abaf5691513b4f54870a26f4a9cc9 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:24:35 +0200 Subject: [PATCH 5/9] Another step of refactoring --- lark/parser_frontends.py | 10 ++--- lark/parsers/grammar_analysis.py | 58 +--------------------------- lark/parsers/lalr_analysis.py | 66 +++++++++++++++++++++++++++++++- lark/parsers/lalr_parser.py | 2 +- 4 files changed, 72 insertions(+), 64 deletions(-) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 54b67bb..9e5c248 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -2,7 +2,7 @@ import re import sre_parse from .lexer import Lexer, ContextualLexer -from .parsers.lalr_analysis import GrammarAnalyzer +from .parsers.lalr_analysis import LALR_Analyzer from .common import is_terminal, GrammarError from .parsers import lalr_parser, earley @@ -24,8 +24,8 @@ class LALR(WithLexer): WithLexer.__init__(self, lexer_conf) self.parser_conf = parser_conf - analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) - analyzer.analyze() + analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) + analyzer.compute_lookahead() self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) def parse(self, text): @@ -37,8 +37,8 @@ class LALR_ContextualLexer: self.lexer_conf = lexer_conf self.parser_conf = parser_conf - self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) - self.analyzer.analyze() + self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) + self.analyzer.compute_lookahead() d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index bdd6c73..d51700a 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -1,11 +1,7 @@ -import logging -from collections import defaultdict, deque -from ..utils import classify, classify_bool, bfs, fzset +from ..utils import bfs, fzset from ..common import GrammarError, is_terminal -ACTION_SHIFT = 0 - class Rule(object): """ origin : a symbol @@ -158,55 +154,3 @@ class GrammarAnalyzer(object): else: return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} - def analyze(self): - - self.states = {} - def step(state): - lookahead = defaultdict(list) - sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) - for rp in sat: - for term in self.FOLLOW.get(rp.rule.origin, ()): - lookahead[term].append(('reduce', rp.rule)) - - d = classify(unsat, lambda rp: rp.next) - for sym, rps in d.items(): - rps = {rp.advance(sym) for rp in rps} - - for rp in set(rps): - if not rp.is_satisfied and not is_terminal(rp.next): - rps |= self.expand_rule(rp.next) - - lookahead[sym].append(('shift', fzset(rps))) - yield fzset(rps) - - for k, v in lookahead.items(): - if len(v) > 1: - if self.debug: - logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) - for x in v: - # XXX resolving shift/reduce into shift, like PLY - # Give a proper warning - if x[0] == 'shift': - lookahead[k] = [x] - - for k, v in lookahead.items(): - assert len(v) == 1, ("Collision", k, v) - - self.states[state] = {k:v[0] for k, v in lookahead.items()} - - for _ in bfs([self.init_state], step): - pass - - # -- - self.enum = list(self.states) - self.enum_rev = {s:i for i,s in enumerate(self.enum)} - self.states_idx = {} - - for s, la in self.states.items(): - la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' - else (v[0], (v[1], len(v[1].expansion))) # Reduce - for k,v in la.items()} - self.states_idx[ self.enum_rev[s] ] = la - - - self.init_state_idx = self.enum_rev[self.init_state] diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 4384ca8..8a8365d 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -1 +1,65 @@ -from grammar_analysis import GrammarAnalyzer, ACTION_SHIFT +import logging +from collections import defaultdict + +from ..utils import classify, classify_bool, bfs, fzset +from ..common import GrammarError, is_terminal + +from grammar_analysis import GrammarAnalyzer + +ACTION_SHIFT = 0 + +class LALR_Analyzer(GrammarAnalyzer): + + def compute_lookahead(self): + + self.states = {} + def step(state): + lookahead = defaultdict(list) + sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied) + for rp in sat: + for term in self.FOLLOW.get(rp.rule.origin, ()): + lookahead[term].append(('reduce', rp.rule)) + + d = classify(unsat, lambda rp: rp.next) + for sym, rps in d.items(): + rps = {rp.advance(sym) for rp in rps} + + for rp in set(rps): + if not rp.is_satisfied and not is_terminal(rp.next): + rps |= self.expand_rule(rp.next) + + lookahead[sym].append(('shift', fzset(rps))) + yield fzset(rps) + + for k, v in lookahead.items(): + if len(v) > 1: + if self.debug: + logging.warn("Shift/reduce conflict for %s: %s. Resolving as shift.", k, v) + for x in v: + # XXX resolving shift/reduce into shift, like PLY + # Give a proper warning + if x[0] == 'shift': + lookahead[k] = [x] + + for k, v in lookahead.items(): + if not len(v) == 1: + raise GrammarError("Collision in %s: %s" %(k, v)) + + self.states[state] = {k:v[0] for k, v in lookahead.items()} + + for _ in bfs([self.init_state], step): + pass + + # -- + self.enum = list(self.states) + self.enum_rev = {s:i for i,s in enumerate(self.enum)} + self.states_idx = {} + + for s, la in self.states.items(): + la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift' + else (v[0], (v[1], len(v[1].expansion))) # Reduce + for k,v in la.items()} + self.states_idx[ self.enum_rev[s] ] = la + + + self.init_state_idx = self.enum_rev[self.init_state] diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 313d808..3280d01 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -1,6 +1,6 @@ -from .lalr_analysis import ACTION_SHIFT from ..common import ParseError, UnexpectedToken +from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT class Parser(object): def __init__(self, analysis, callback): From e4d3e74f6ae4431b43fd338b48b2389bd185ff35 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 00:36:59 +0200 Subject: [PATCH 6/9] Finished refactoring --- lark/lexer.py | 9 +++++++-- lark/parser_frontends.py | 17 ++++++----------- lark/parsers/lalr_parser.py | 16 +++++++++------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 301d555..db5dde7 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -197,14 +197,19 @@ class ContextualLexer: self.root_lexer = Lexer(tokens, ignore=ignore) - def lex(self, stream, parser): + self.set_parser_state(None) # Needs to be set on the outside + + def set_parser_state(self, state): + self.parser_state = state + + def lex(self, stream): lex_pos = 0 line = 1 col_start_pos = 0 newline_types = list(self.root_lexer.newline_types) ignore_types = list(self.root_lexer.ignore_types) while True: - lexer = self.lexers[parser.state] + lexer = self.lexers[self.parser_state] for mre, type_from_index in lexer.mres: m = mre.match(stream, lex_pos) if m: diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 9e5c248..1c46d35 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -2,7 +2,6 @@ import re import sre_parse from .lexer import Lexer, ContextualLexer -from .parsers.lalr_analysis import LALR_Analyzer from .common import is_terminal, GrammarError from .parsers import lalr_parser, earley @@ -22,11 +21,9 @@ class WithLexer: class LALR(WithLexer): def __init__(self, lexer_conf, parser_conf): WithLexer.__init__(self, lexer_conf) - self.parser_conf = parser_conf - analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) - analyzer.compute_lookahead() - self.parser = lalr_parser.Parser(analyzer, parser_conf.callback) + self.parser_conf = parser_conf + self.parser = lalr_parser.Parser(parser_conf) def parse(self, text): tokens = list(self.lex(text)) @@ -37,21 +34,19 @@ class LALR_ContextualLexer: self.lexer_conf = lexer_conf self.parser_conf = parser_conf - self.analyzer = LALR_Analyzer(parser_conf.rules, parser_conf.start) - self.analyzer.compute_lookahead() + self.parser = lalr_parser.Parser(parser_conf) - d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()} + d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()} self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=lexer_conf.postlex.always_accept if lexer_conf.postlex else ()) def parse(self, text): - parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback) - tokens = self.lexer.lex(text, parser) + tokens = self.lexer.lex(text) if self.lexer_conf.postlex: tokens = self.lexer_conf.postlex.process(tokens) - return parser.parse(tokens, True) + return self.parser.parse(tokens, self.lexer.set_parser_state) diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 3280d01..7394f91 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -3,13 +3,13 @@ from ..common import ParseError, UnexpectedToken from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT class Parser(object): - def __init__(self, analysis, callback): - self.analysis = analysis - self.callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) - for rule in analysis.rules} - self.state = self.analysis.init_state_idx + def __init__(self, parser_conf): + self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) + self.analysis.compute_lookahead() + self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) + for rule in self.analysis.rules} - def parse(self, seq, set_state=False): + def parse(self, seq, set_state=None): i = 0 stream = iter(seq) states_idx = self.analysis.states_idx @@ -17,6 +17,8 @@ class Parser(object): state_stack = [self.analysis.init_state_idx] value_stack = [] + if set_state: set_state(self.analysis.init_state_idx) + def get_action(key): state = state_stack[-1] try: @@ -54,7 +56,7 @@ class Parser(object): if action == ACTION_SHIFT: state_stack.append(arg) value_stack.append(token) - if set_state: self.state = arg + if set_state: set_state(arg) token = next(stream) i += 1 else: From 538f944602172061ba4396dc726e575c37f7aa72 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 20:00:24 +0200 Subject: [PATCH 7/9] My Earley parser is working --- lark/common.py | 2 +- lark/parser_frontends.py | 60 ++++++++++++++- lark/parsers/earley2.py | 123 +++++++++++++++++-------------- lark/parsers/grammar_analysis.py | 2 +- lark/parsers/lalr_analysis.py | 2 +- 5 files changed, 127 insertions(+), 62 deletions(-) diff --git a/lark/common.py b/lark/common.py index 06220f0..122c7e5 100644 --- a/lark/common.py +++ b/lark/common.py @@ -28,7 +28,7 @@ class UnexpectedToken(ParseError): def is_terminal(sym): - return sym.isupper() or sym[0] == '$' + return isinstance(sym, tuple) or sym.isupper() or sym[0] == '$' class LexerConf: diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 1c46d35..e9f117c 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -3,8 +3,9 @@ import sre_parse from .lexer import Lexer, ContextualLexer -from .common import is_terminal, GrammarError -from .parsers import lalr_parser, earley +from .common import is_terminal, GrammarError, ParserConf +from .parsers import lalr_parser, earley, earley2 +from .parsers.grammar_analysis import Rule class WithLexer: def __init__(self, lexer_conf): @@ -50,7 +51,7 @@ class LALR_ContextualLexer: -class Earley(WithLexer): +class Nearley(WithLexer): def __init__(self, lexer_conf, parser_conf): WithLexer.__init__(self, lexer_conf) @@ -74,6 +75,26 @@ class Earley(WithLexer): assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' return res[0] + +class MyEarley(WithLexer): + def __init__(self, lexer_conf, parser_conf): + WithLexer.__init__(self, lexer_conf) + + rules = [(n, self._prepare_expansion(x), a) + for n,x,a in parser_conf.rules] + + self.parser = earley2.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) + + def _prepare_expansion(self, expansion): + return [(sym,) if is_terminal(sym) else sym for sym in expansion] + + def parse(self, text): + tokens = list(self.lex(text)) + res = self.parser.parse(tokens) + assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' + return res[0] + + class Earley_NoLex: def __init__(self, lexer_conf, parser_conf): self.token_by_name = {t.name:t for t in lexer_conf.tokens} @@ -101,4 +122,35 @@ class Earley_NoLex: assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' return res[0] -ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex, 'lalr_contextual_lexer': LALR_ContextualLexer } + +class MyEarley_NoLex: + def __init__(self, lexer_conf, parser_conf): + self.token_by_name = {t.name:t for t in lexer_conf.tokens} + + rules = [(n, list(self._prepare_expansion(x)), a) + for n,x,a in parser_conf.rules] + + self.parser = earley2.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) + + def _prepare_expansion(self, expansion): + for sym in expansion: + if is_terminal(sym): + regexp = self.token_by_name[sym].to_regexp() + width = sre_parse.parse(regexp).getwidth() + if not width == (1,1): + raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) + yield re.compile(regexp).match + else: + yield sym + + def parse(self, text): + res = self.parser.parse(text) + assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' + return res[0] + +ENGINE_DICT = { + 'lalr': LALR, + 'earley': MyEarley, + 'earley_nolex': Earley_NoLex, + 'lalr_contextual_lexer': LALR_ContextualLexer +} diff --git a/lark/parsers/earley2.py b/lark/parsers/earley2.py index 7527248..6348747 100644 --- a/lark/parsers/earley2.py +++ b/lark/parsers/earley2.py @@ -1,67 +1,63 @@ +import sys + from ..common import ParseError, UnexpectedToken, is_terminal from grammar_analysis import GrammarAnalyzer -from ..tree import Tree +# is_terminal = callable class Item: - def __init__(self, rule_ptr, start, data): - self.rule_ptr = rule_ptr + def __init__(self, rule, ptr, start, data): + self.rule = rule + self.ptr = ptr self.start = start self.data = data @property def expect(self): - return self.rule_ptr.next + return self.rule.expansion[self.ptr] @property def is_complete(self): - return self.rule_ptr.is_satisfied - - @property - def name(self): - return self.rule_ptr.rule.origin + return self.ptr == len(self.rule.expansion) def advance(self, data): - return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data]) + return Item(self.rule, self.ptr+1, self.start, self.data + [data]) def __eq__(self, other): - return self.rule_ptr == other.rule_ptr and self.start == other.start + return self.start == other.start and self.ptr == other.ptr and self.rule == other.rule def __hash__(self): - return hash((self.rule_ptr, self.start)) - - def __repr__(self): - return '%s (%s)' % (self.rule_ptr, self.start) + return hash((self.rule, self.ptr, self.start)) class Parser: - def __init__(self, rules, start): - self.analyzer = GrammarAnalyzer(rules, start) - self.start = start + def __init__(self, parser_conf): + self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start) + self.start = parser_conf.start + self.postprocess = {} + self.predictions = {} + for rule in self.analysis.rules: + if rule.origin != '$root': # XXX kinda ugly + self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) + self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)] def parse(self, stream): # Define parser functions def predict(symbol, i): assert not is_terminal(symbol), symbol - return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)} - - def scan(item, inp): - if item.expect == inp: # TODO Do a smarter match, i.e. regexp - return {item.advance(inp)} - else: - return set() + return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]} def complete(item, table): - name = item.name - item.data = Tree(name, item.data) + #item.data = (item.rule_ptr.rule, item.data) + item.data = self.postprocess[item.rule](item.data) return {old_item.advance(item.data) for old_item in table[item.start] - if not old_item.is_complete and old_item.expect == name} + if not old_item.is_complete and old_item.expect == item.rule.origin} - def process_column(i, char): - cur_set = table[-1] + def process_column(i, term): + assert i == len(table)-1 + cur_set = table[i] next_set = set() - table.append(next_set) to_process = cur_set while to_process: @@ -71,61 +67,78 @@ class Parser: new_items |= complete(item, table) else: if is_terminal(item.expect): - next_set |= scan(item, char) + # scan + if item.expect[0] == term: + next_set.add(item.advance(stream[i])) else: - new_items |= predict(item.expect, i) + if item.ptr: # part of an already predicted batch + new_items |= predict(item.expect, i) - to_process = new_items - cur_set + to_process = new_items - cur_set # TODO: is this precaution necessary? cur_set |= to_process - if not next_set and char != '$end': - expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete]) - raise UnexpectedToken(char, expect, stream, i) + + if not next_set and term != '$end': + expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete]) + raise UnexpectedToken(term, expect, stream, i) + + table.append(next_set) # Main loop starts table = [predict(self.start, 0)] for i, char in enumerate(stream): - process_column(i, char) + process_column(i, char.type) process_column(len(stream), '$end') # Parse ended. Now build a parse tree solutions = [n.data for n in table[len(stream)] - if n.is_complete and n.name==self.start and n.start==0] + if n.is_complete and n.rule.origin==self.start and n.start==0] if not solutions: raise ParseError('Incomplete parse: Could not find a solution to input') return solutions + #return map(self.reduce_solution, solutions) + def reduce_solution(self, solution): + rule, children = solution + children = [self.reduce_solution(c) if isinstance(c, tuple) else c for c in children] + return self.postprocess[rule](children) - +from ..common import ParserConf +# A = 'A'.__eq__ # rules = [ -# ('a', ['a', 'A']), -# ('a', ['a', 'A', 'a']), -# ('a', ['a', 'A', 'A', 'a']), -# ('a', ['A']), +# ('a', ['a', A], None), +# ('a', ['a', A, 'a'], None), +# ('a', ['a', A, A, 'a'], None), +# ('a', [A], None), # ] -# p = Parser(rules, 'a') +# p = Parser(ParserConf(rules, None, 'a')) # for x in p.parse('AAAA'): # print '->' # print x.pretty() +# import re +# NUM = re.compile('[0-9]').match +# ADD = re.compile('[+-]').match +# MUL = re.compile('[*/]').match # rules = [ -# ('sum', ['sum', "A", 'product']), -# ('sum', ['product']), -# ('product', ['product', "M", 'factor']), -# ('product', ['factor']), -# ('factor', ['L', 'sum', 'R']), -# ('factor', ['number']), -# ('number', ['N', 'number']), -# ('number', ['N']), +# ('sum', ['sum', ADD, 'product'], None), +# ('sum', ['product'], None), +# ('product', ['product', MUL, 'factor'], None), +# ('product', ['factor'], None), +# ('factor', ['('.__eq__, 'sum', ')'.__eq__], None), +# ('factor', ['number'], None), +# ('number', [NUM, 'number'], None), +# ('number', [NUM], None), # ] -# p = Parser(rules, 'sum') -# print p.parse('NALNMNANR') +# p = Parser(ParserConf(rules, None, 'sum')) +# # print p.parse('NALNMNANR') +# print p.parse('1+(2*3-4)')[0].pretty() diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index d51700a..c03d9ae 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -133,7 +133,7 @@ class GrammarAnalyzer(object): "Returns all init_ptrs accessible by rule (recursive)" init_ptrs = set() def _expand_rule(rule): - assert not is_terminal(rule) + assert not is_terminal(rule), rule for r in self.rules_by_origin[rule]: init_ptr = RulePtr(r, 0) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 8a8365d..83f96fc 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -4,7 +4,7 @@ from collections import defaultdict from ..utils import classify, classify_bool, bfs, fzset from ..common import GrammarError, is_terminal -from grammar_analysis import GrammarAnalyzer +from .grammar_analysis import GrammarAnalyzer ACTION_SHIFT = 0 From c17558dd91a01990408bda747deb15fbd13c0493 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 20:15:29 +0200 Subject: [PATCH 8/9] Official switched to my Earley implementation --- lark/parser_frontends.py | 32 ++--- lark/parsers/earley.py | 276 +++++++++++++++++++-------------------- lark/parsers/earley2.py | 144 -------------------- lark/parsers/nearley.py | 155 ++++++++++++++++++++++ 4 files changed, 301 insertions(+), 306 deletions(-) delete mode 100644 lark/parsers/earley2.py create mode 100644 lark/parsers/nearley.py diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index e9f117c..891615a 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,10 +1,10 @@ import re import sre_parse -from .lexer import Lexer, ContextualLexer +from .lexer import Lexer, ContextualLexer, Token from .common import is_terminal, GrammarError, ParserConf -from .parsers import lalr_parser, earley, earley2 +from .parsers import lalr_parser, earley, nearley from .parsers.grammar_analysis import Rule class WithLexer: @@ -56,18 +56,14 @@ class Nearley(WithLexer): WithLexer.__init__(self, lexer_conf) rules = [{'name':n, - 'symbols': list(self._prepare_expansion(x)), + 'symbols': self._prepare_expansion(x), 'postprocess': getattr(parser_conf.callback, a)} for n,x,a in parser_conf.rules] - self.parser = earley.Parser(rules, parser_conf.start) + self.parser = nearley.Parser(rules, parser_conf.start) def _prepare_expansion(self, expansion): - for sym in expansion: - if is_terminal(sym): - yield sym, None - else: - yield sym + return [(sym, None) if is_terminal(sym) else sym for sym in expansion] def parse(self, text): tokens = list(self.lex(text)) @@ -76,14 +72,14 @@ class Nearley(WithLexer): return res[0] -class MyEarley(WithLexer): +class Earley(WithLexer): def __init__(self, lexer_conf, parser_conf): WithLexer.__init__(self, lexer_conf) rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] - self.parser = earley2.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) + self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) def _prepare_expansion(self, expansion): return [(sym,) if is_terminal(sym) else sym for sym in expansion] @@ -95,7 +91,7 @@ class MyEarley(WithLexer): return res[0] -class Earley_NoLex: +class Nearley_NoLex: def __init__(self, lexer_conf, parser_conf): self.token_by_name = {t.name:t for t in lexer_conf.tokens} @@ -104,7 +100,7 @@ class Earley_NoLex: 'postprocess': getattr(parser_conf.callback, a)} for n,x,a in parser_conf.rules] - self.parser = earley.Parser(rules, parser_conf.start) + self.parser = nearley.Parser(rules, parser_conf.start) def _prepare_expansion(self, expansion): for sym in expansion: @@ -123,14 +119,14 @@ class Earley_NoLex: return res[0] -class MyEarley_NoLex: +class Earley_NoLex: def __init__(self, lexer_conf, parser_conf): self.token_by_name = {t.name:t for t in lexer_conf.tokens} rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] - self.parser = earley2.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) + self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) def _prepare_expansion(self, expansion): for sym in expansion: @@ -139,18 +135,18 @@ class MyEarley_NoLex: width = sre_parse.parse(regexp).getwidth() if not width == (1,1): raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) - yield re.compile(regexp).match + yield (re.compile(regexp).match,) else: yield sym def parse(self, text): - res = self.parser.parse(text) + res = self.parser.parse([Token(x,x) for x in text]) # A little hacky perhaps! assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' return res[0] ENGINE_DICT = { 'lalr': LALR, - 'earley': MyEarley, + 'earley': Earley, 'earley_nolex': Earley_NoLex, 'lalr_contextual_lexer': LALR_ContextualLexer } diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index b2a511e..2887a52 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -1,155 +1,143 @@ -"My name is Earley" +from ..common import ParseError, UnexpectedToken, is_terminal +from .grammar_analysis import GrammarAnalyzer -from ..utils import classify, STRING_TYPE -from ..common import ParseError, UnexpectedToken +# is_terminal = callable -try: - xrange -except NameError: - xrange = range - -class MatchFailed(object): - pass - -class AbortParseMatch(Exception): - pass - - -class Rule(object): - def __init__(self, name, symbols, postprocess): - self.name = name - self.symbols = symbols - self.postprocess = postprocess - -class State(object): - def __init__(self, rule, expect, reference, data=None): +class Item: + def __init__(self, rule, ptr, start, data): self.rule = rule - self.expect = expect - self.reference = reference - self.data = data or [] - - self.is_complete = (self.expect == len(self.rule.symbols)) - if not self.is_complete: - self.expect_symbol = self.rule.symbols[self.expect] - self.is_terminal = isinstance(self.expect_symbol, tuple) - else: - self.is_terminal = False - - def next_state(self, data): - return State(self.rule, self.expect+1, self.reference, self.data + [data]) - - def consume_terminal(self, inp): - if not self.is_complete and self.is_terminal: - # PORT: originally tests regexp - - if self.expect_symbol[1] is not None: - match = self.expect_symbol[1].match(inp) - if match: - return self.next_state(inp) - - elif self.expect_symbol[0] == inp.type: - return self.next_state(inp) - - def consume_nonterminal(self, inp): - if not self.is_complete and not self.is_terminal: - - if self.expect_symbol == inp: - return self.next_state(inp) - - def process(self, location, ind, table, rules, added_rules): - - if self.is_complete: - # Completed a rule - if self.rule.postprocess: - try: - self.data = self.rule.postprocess(self.data) - except AbortParseMatch: - self.data = MatchFailed - - if self.data is not MatchFailed: - for s in table[self.reference]: - x = s.consume_nonterminal(self.rule.name) - if x: - x.data[-1] = self.data - x.epsilon_closure(location, ind, table) - - else: - exp = self.rule.symbols[self.expect] - if isinstance(exp, tuple): - return - - for r in rules[exp]: - assert r.name == exp - if r not in added_rules: - if r.symbols: - added_rules.add(r) - State(r, 0, location).epsilon_closure(location, ind, table) - else: - # Empty rule - new_copy = self.consume_nonterminal(r.name) - new_copy.data[-1] = r.postprocess([]) if r.postprocess else [] + self.ptr = ptr + self.start = start + self.data = data - new_copy.epsilon_closure(location, ind, table) + @property + def expect(self): + return self.rule.expansion[self.ptr] - def epsilon_closure(self, location, ind, table): - col = table[location] - col.append(self) + @property + def is_complete(self): + return self.ptr == len(self.rule.expansion) - if not self.is_complete: - for i in xrange(ind): - state = col[i] - if state.is_complete and state.reference == location: - x = self.consume_nonterminal(state.rule.name) - if x: - x.data[-1] = state.data - x.epsilon_closure(location, ind, table) + def advance(self, data): + return Item(self.rule, self.ptr+1, self.start, self.data + [data]) + def __eq__(self, other): + return self.start == other.start and self.ptr == other.ptr and self.rule == other.rule + def __hash__(self): + return hash((self.rule, self.ptr, self.start)) -class Parser(object): - def __init__(self, rules, start=None): - self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] - self.rules_by_name = classify(self.rules, lambda r: r.name) - self.start = start or self.rules[0].name - def advance_to(self, table, added_rules): - n = len(table)-1 - for w, s in enumerate(table[n]): - s.process(n, w, table, self.rules_by_name, added_rules) +class Parser: + def __init__(self, parser_conf): + self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start) + self.start = parser_conf.start + + self.postprocess = {} + self.predictions = {} + for rule in self.analysis.rules: + if rule.origin != '$root': # XXX kinda ugly + self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) + self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)] def parse(self, stream): - initial_rules = set(self.rules_by_name[self.start]) - table = [[State(r, 0, 0) for r in initial_rules]] - self.advance_to(table, initial_rules) - - i = 0 - - while i < len(stream): - col = [] - - token = stream[i] - for s in table[-1]: - x = s.consume_terminal(token) - if x: - col.append(x) - - if not col: - expected = {s.expect_symbol for s in table[-1] if s.is_terminal} - raise UnexpectedToken(stream[i], expected, stream, i) - - table.append(col) - self.advance_to(table, set()) - - i += 1 - - res = list(self.finish(table)) - if not res: - raise ParseError('Incomplete parse') - return res - - def finish(self, table): - for t in table[-1]: - if (t.rule.name == self.start - and t.expect == len(t.rule.symbols) - and t.reference == 0 - and t.data is not MatchFailed): - yield t.data + # Define parser functions + + def predict(symbol, i): + assert not is_terminal(symbol), symbol + return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]} + + def complete(item, table): + #item.data = (item.rule_ptr.rule, item.data) + item.data = self.postprocess[item.rule](item.data) + return {old_item.advance(item.data) for old_item in table[item.start] + if not old_item.is_complete and old_item.expect == item.rule.origin} + + def process_column(i, term): + assert i == len(table)-1 + cur_set = table[i] + next_set = set() + + to_process = cur_set + while to_process: + new_items = set() + for item in to_process: + if item.is_complete: + new_items |= complete(item, table) + else: + if is_terminal(item.expect): + # scan + match = item.expect[0](term) if callable(item.expect[0]) else item.expect[0] == term + if match: + next_set.add(item.advance(stream[i])) + else: + if item.ptr: # part of an already predicted batch + new_items |= predict(item.expect, i) + + to_process = new_items - cur_set # TODO: is this precaution necessary? + cur_set |= to_process + + + if not next_set and term != '$end': + expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete]) + raise UnexpectedToken(term, expect, stream, i) + + table.append(next_set) + + # Main loop starts + + table = [predict(self.start, 0)] + + for i, char in enumerate(stream): + process_column(i, char.type) + + process_column(len(stream), '$end') + + # Parse ended. Now build a parse tree + solutions = [n.data for n in table[len(stream)] + if n.is_complete and n.rule.origin==self.start and n.start==0] + + if not solutions: + raise ParseError('Incomplete parse: Could not find a solution to input') + + return solutions + #return map(self.reduce_solution, solutions) + + def reduce_solution(self, solution): + rule, children = solution + children = [self.reduce_solution(c) if isinstance(c, tuple) else c for c in children] + return self.postprocess[rule](children) + + + +from ..common import ParserConf +# A = 'A'.__eq__ +# rules = [ +# ('a', ['a', A], None), +# ('a', ['a', A, 'a'], None), +# ('a', ['a', A, A, 'a'], None), +# ('a', [A], None), +# ] + +# p = Parser(ParserConf(rules, None, 'a')) +# for x in p.parse('AAAA'): +# print '->' +# print x.pretty() + +# import re +# NUM = re.compile('[0-9]').match +# ADD = re.compile('[+-]').match +# MUL = re.compile('[*/]').match +# rules = [ +# ('sum', ['sum', ADD, 'product'], None), +# ('sum', ['product'], None), +# ('product', ['product', MUL, 'factor'], None), +# ('product', ['factor'], None), +# ('factor', ['('.__eq__, 'sum', ')'.__eq__], None), +# ('factor', ['number'], None), +# ('number', [NUM, 'number'], None), +# ('number', [NUM], None), +# ] + +# p = Parser(ParserConf(rules, None, 'sum')) +# # print p.parse('NALNMNANR') +# print p.parse('1+(2*3-4)')[0].pretty() diff --git a/lark/parsers/earley2.py b/lark/parsers/earley2.py deleted file mode 100644 index 6348747..0000000 --- a/lark/parsers/earley2.py +++ /dev/null @@ -1,144 +0,0 @@ -import sys - -from ..common import ParseError, UnexpectedToken, is_terminal -from grammar_analysis import GrammarAnalyzer - -# is_terminal = callable - -class Item: - def __init__(self, rule, ptr, start, data): - self.rule = rule - self.ptr = ptr - self.start = start - self.data = data - - @property - def expect(self): - return self.rule.expansion[self.ptr] - - @property - def is_complete(self): - return self.ptr == len(self.rule.expansion) - - def advance(self, data): - return Item(self.rule, self.ptr+1, self.start, self.data + [data]) - - def __eq__(self, other): - return self.start == other.start and self.ptr == other.ptr and self.rule == other.rule - def __hash__(self): - return hash((self.rule, self.ptr, self.start)) - - -class Parser: - def __init__(self, parser_conf): - self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start) - self.start = parser_conf.start - - self.postprocess = {} - self.predictions = {} - for rule in self.analysis.rules: - if rule.origin != '$root': # XXX kinda ugly - self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) - self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)] - - def parse(self, stream): - # Define parser functions - - def predict(symbol, i): - assert not is_terminal(symbol), symbol - return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]} - - def complete(item, table): - #item.data = (item.rule_ptr.rule, item.data) - item.data = self.postprocess[item.rule](item.data) - return {old_item.advance(item.data) for old_item in table[item.start] - if not old_item.is_complete and old_item.expect == item.rule.origin} - - def process_column(i, term): - assert i == len(table)-1 - cur_set = table[i] - next_set = set() - - to_process = cur_set - while to_process: - new_items = set() - for item in to_process: - if item.is_complete: - new_items |= complete(item, table) - else: - if is_terminal(item.expect): - # scan - if item.expect[0] == term: - next_set.add(item.advance(stream[i])) - else: - if item.ptr: # part of an already predicted batch - new_items |= predict(item.expect, i) - - to_process = new_items - cur_set # TODO: is this precaution necessary? - cur_set |= to_process - - - if not next_set and term != '$end': - expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete]) - raise UnexpectedToken(term, expect, stream, i) - - table.append(next_set) - - # Main loop starts - - table = [predict(self.start, 0)] - - for i, char in enumerate(stream): - process_column(i, char.type) - - process_column(len(stream), '$end') - - # Parse ended. Now build a parse tree - solutions = [n.data for n in table[len(stream)] - if n.is_complete and n.rule.origin==self.start and n.start==0] - - if not solutions: - raise ParseError('Incomplete parse: Could not find a solution to input') - - return solutions - #return map(self.reduce_solution, solutions) - - def reduce_solution(self, solution): - rule, children = solution - children = [self.reduce_solution(c) if isinstance(c, tuple) else c for c in children] - return self.postprocess[rule](children) - - - -from ..common import ParserConf -# A = 'A'.__eq__ -# rules = [ -# ('a', ['a', A], None), -# ('a', ['a', A, 'a'], None), -# ('a', ['a', A, A, 'a'], None), -# ('a', [A], None), -# ] - -# p = Parser(ParserConf(rules, None, 'a')) -# for x in p.parse('AAAA'): -# print '->' -# print x.pretty() - -# import re -# NUM = re.compile('[0-9]').match -# ADD = re.compile('[+-]').match -# MUL = re.compile('[*/]').match -# rules = [ -# ('sum', ['sum', ADD, 'product'], None), -# ('sum', ['product'], None), -# ('product', ['product', MUL, 'factor'], None), -# ('product', ['factor'], None), -# ('factor', ['('.__eq__, 'sum', ')'.__eq__], None), -# ('factor', ['number'], None), -# ('number', [NUM, 'number'], None), -# ('number', [NUM], None), -# ] - -# p = Parser(ParserConf(rules, None, 'sum')) -# # print p.parse('NALNMNANR') -# print p.parse('1+(2*3-4)')[0].pretty() diff --git a/lark/parsers/nearley.py b/lark/parsers/nearley.py new file mode 100644 index 0000000..b2a511e --- /dev/null +++ b/lark/parsers/nearley.py @@ -0,0 +1,155 @@ +"My name is Earley" + +from ..utils import classify, STRING_TYPE +from ..common import ParseError, UnexpectedToken + +try: + xrange +except NameError: + xrange = range + +class MatchFailed(object): + pass + +class AbortParseMatch(Exception): + pass + + +class Rule(object): + def __init__(self, name, symbols, postprocess): + self.name = name + self.symbols = symbols + self.postprocess = postprocess + +class State(object): + def __init__(self, rule, expect, reference, data=None): + self.rule = rule + self.expect = expect + self.reference = reference + self.data = data or [] + + self.is_complete = (self.expect == len(self.rule.symbols)) + if not self.is_complete: + self.expect_symbol = self.rule.symbols[self.expect] + self.is_terminal = isinstance(self.expect_symbol, tuple) + else: + self.is_terminal = False + + def next_state(self, data): + return State(self.rule, self.expect+1, self.reference, self.data + [data]) + + def consume_terminal(self, inp): + if not self.is_complete and self.is_terminal: + # PORT: originally tests regexp + + if self.expect_symbol[1] is not None: + match = self.expect_symbol[1].match(inp) + if match: + return self.next_state(inp) + + elif self.expect_symbol[0] == inp.type: + return self.next_state(inp) + + def consume_nonterminal(self, inp): + if not self.is_complete and not self.is_terminal: + + if self.expect_symbol == inp: + return self.next_state(inp) + + def process(self, location, ind, table, rules, added_rules): + + if self.is_complete: + # Completed a rule + if self.rule.postprocess: + try: + self.data = self.rule.postprocess(self.data) + except AbortParseMatch: + self.data = MatchFailed + + if self.data is not MatchFailed: + for s in table[self.reference]: + x = s.consume_nonterminal(self.rule.name) + if x: + x.data[-1] = self.data + x.epsilon_closure(location, ind, table) + + else: + exp = self.rule.symbols[self.expect] + if isinstance(exp, tuple): + return + + for r in rules[exp]: + assert r.name == exp + if r not in added_rules: + if r.symbols: + added_rules.add(r) + State(r, 0, location).epsilon_closure(location, ind, table) + else: + # Empty rule + new_copy = self.consume_nonterminal(r.name) + new_copy.data[-1] = r.postprocess([]) if r.postprocess else [] + + new_copy.epsilon_closure(location, ind, table) + + def epsilon_closure(self, location, ind, table): + col = table[location] + col.append(self) + + if not self.is_complete: + for i in xrange(ind): + state = col[i] + if state.is_complete and state.reference == location: + x = self.consume_nonterminal(state.rule.name) + if x: + x.data[-1] = state.data + x.epsilon_closure(location, ind, table) + + +class Parser(object): + def __init__(self, rules, start=None): + self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules] + self.rules_by_name = classify(self.rules, lambda r: r.name) + self.start = start or self.rules[0].name + + def advance_to(self, table, added_rules): + n = len(table)-1 + for w, s in enumerate(table[n]): + s.process(n, w, table, self.rules_by_name, added_rules) + + def parse(self, stream): + initial_rules = set(self.rules_by_name[self.start]) + table = [[State(r, 0, 0) for r in initial_rules]] + self.advance_to(table, initial_rules) + + i = 0 + + while i < len(stream): + col = [] + + token = stream[i] + for s in table[-1]: + x = s.consume_terminal(token) + if x: + col.append(x) + + if not col: + expected = {s.expect_symbol for s in table[-1] if s.is_terminal} + raise UnexpectedToken(stream[i], expected, stream, i) + + table.append(col) + self.advance_to(table, set()) + + i += 1 + + res = list(self.finish(table)) + if not res: + raise ParseError('Incomplete parse') + return res + + def finish(self, table): + for t in table[-1]: + if (t.rule.name == self.start + and t.expect == len(t.rule.symbols) + and t.reference == 0 + and t.data is not MatchFailed): + yield t.data From c14435ac959dc2ea4df40d9d8f1e15091c70268f Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 20 Feb 2017 20:20:26 +0200 Subject: [PATCH 9/9] Some cleanup --- lark/parsers/earley.py | 45 ------------------------------------------ 1 file changed, 45 deletions(-) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 2887a52..0ba74a2 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -1,8 +1,6 @@ from ..common import ParseError, UnexpectedToken, is_terminal from .grammar_analysis import GrammarAnalyzer -# is_terminal = callable - class Item: def __init__(self, rule, ptr, start, data): self.rule = rule @@ -47,7 +45,6 @@ class Parser: return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]} def complete(item, table): - #item.data = (item.rule_ptr.rule, item.data) item.data = self.postprocess[item.rule](item.data) return {old_item.advance(item.data) for old_item in table[item.start] if not old_item.is_complete and old_item.expect == item.rule.origin} @@ -84,7 +81,6 @@ class Parser: table.append(next_set) # Main loop starts - table = [predict(self.start, 0)] for i, char in enumerate(stream): @@ -100,44 +96,3 @@ class Parser: raise ParseError('Incomplete parse: Could not find a solution to input') return solutions - #return map(self.reduce_solution, solutions) - - def reduce_solution(self, solution): - rule, children = solution - children = [self.reduce_solution(c) if isinstance(c, tuple) else c for c in children] - return self.postprocess[rule](children) - - - -from ..common import ParserConf -# A = 'A'.__eq__ -# rules = [ -# ('a', ['a', A], None), -# ('a', ['a', A, 'a'], None), -# ('a', ['a', A, A, 'a'], None), -# ('a', [A], None), -# ] - -# p = Parser(ParserConf(rules, None, 'a')) -# for x in p.parse('AAAA'): -# print '->' -# print x.pretty() - -# import re -# NUM = re.compile('[0-9]').match -# ADD = re.compile('[+-]').match -# MUL = re.compile('[*/]').match -# rules = [ -# ('sum', ['sum', ADD, 'product'], None), -# ('sum', ['product'], None), -# ('product', ['product', MUL, 'factor'], None), -# ('product', ['factor'], None), -# ('factor', ['('.__eq__, 'sum', ')'.__eq__], None), -# ('factor', ['number'], None), -# ('number', [NUM, 'number'], None), -# ('number', [NUM], None), -# ] - -# p = Parser(ParserConf(rules, None, 'sum')) -# # print p.parse('NALNMNANR') -# print p.parse('1+(2*3-4)')[0].pretty()