diff --git a/lark/grammar.py b/lark/grammar.py index 2689389..b555c34 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -8,9 +8,15 @@ class Symbol(object): assert isinstance(other, Symbol), other return self.is_term == other.is_term and self.name == other.name + def __ne__(self, other): + return not (self == other) + def __hash__(self): return hash(self.name) + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.name) + class Terminal(Symbol): is_term = True diff --git a/lark/lexer.py b/lark/lexer.py index e7af2a2..19e1be4 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -3,7 +3,7 @@ import re from .utils import Str, classify -from .common import PatternStr, PatternRE, TokenDef +from .common import is_terminal, PatternStr, PatternRE, TokenDef ###{standalone class LexError(Exception): @@ -234,7 +234,7 @@ class ContextualLexer: lexer = lexer_by_tokens[key] except KeyError: accepts = set(accepts) | set(ignore) | set(always_accept) - state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END'] + state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) lexer_by_tokens[key] = lexer diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index e81569f..1d4e2b8 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -110,7 +110,7 @@ class ParseTreeBuilder: def _init_builders(self, rules): filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} - assert all(t.filter_out for t in filter_out) + assert all(t.name.startswith('_') for t in filter_out) for rule in rules: options = rule.options diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 24c3622..e4401c1 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -7,7 +7,11 @@ from .lexer import Lexer, ContextualLexer, Token from .common import GrammarError from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk from .tree import Tree -from .grammar import Terminal +from .grammar import Terminal, NonTerminal + +def terminals(seq): + # return [Terminal(t) for t in seq] + return seq class WithLexer: def init_traditional_lexer(self, lexer_conf): @@ -18,7 +22,10 @@ class WithLexer: self.lexer_conf = lexer_conf states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () - self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks) + self.lexer = ContextualLexer(lexer_conf.tokens, states, + ignore=terminals(lexer_conf.ignore), + always_accept=terminals(always_accept), + user_callbacks=lexer_conf.callbacks) def lex(self, text): stream = self.lexer.lex(text) @@ -74,7 +81,7 @@ class Earley_NoLex: def match(self, term, text, index=0): - return self.regexps[term].match(text, index) + return self.regexps[term.name].match(text, index) def _prepare_match(self, lexer_conf): self.regexps = {} diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py index 9d643aa..e2bcd83 100644 --- a/lark/parsers/cyk.py +++ b/lark/parsers/cyk.py @@ -8,47 +8,19 @@ from collections import defaultdict import itertools -from ..common import ParseError, is_terminal +from ..common import ParseError from ..lexer import Token from ..tree import Tree +from ..grammar import Terminal as T, NonTerminal as NT, Symbol try: xrange except NameError: xrange = range -class Symbol(object): - """Any grammar symbol.""" - - def __init__(self, s): - self.s = s - - def __repr__(self): - return '%s(%s)' % (type(self).__name__, str(self)) - - def __str__(self): - return str(self.s) - - def __eq__(self, other): - return self.s == str(other) - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((type(self), str(self.s))) - - -class T(Symbol): - """Terminal.""" - - def match(self, s): - return self.s == s.type - - -class NT(Symbol): - """Non-terminal.""" - pass +def match(t, s): + assert isinstance(t, T) + return t.name == s.type class Rule(object): @@ -121,10 +93,12 @@ class Parser(object): def _to_rule(self, lark_rule): """Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" + assert isinstance(lark_rule.origin, NT) + assert all(isinstance(x, Symbol) for x in lark_rule.expansion) return Rule( - NT(lark_rule.origin), [ - T(x) if is_terminal(x) else NT(x) for x in lark_rule.expansion - ], weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, alias=lark_rule.alias) + lark_rule.origin, lark_rule.expansion, + weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, + alias=lark_rule.alias) def parse(self, tokenized): # pylint: disable=invalid-name """Parses input, which is a list of tokens.""" @@ -132,7 +106,7 @@ class Parser(object): # Check if the parse succeeded. if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): raise ParseError('Parsing failed.') - parse = trees[(0, len(tokenized) - 1)][NT(self.start)] + parse = trees[(0, len(tokenized) - 1)][self.start] return self._to_tree(revert_cnf(parse)) def _to_tree(self, rule_node): @@ -143,8 +117,8 @@ class Parser(object): if isinstance(child, RuleNode): children.append(self._to_tree(child)) else: - assert isinstance(child.s, Token) - children.append(child.s) + assert isinstance(child.name, Token) + children.append(child.name) t = Tree(orig_rule.origin, children) t.rule=orig_rule return t @@ -169,7 +143,7 @@ def _parse(s, g): # Populate base case with existing terminal production rules for i, w in enumerate(s): for terminal, rules in g.terminal_rules.items(): - if terminal.match(w): + if match(terminal, w): for rule in rules: table[(i, i)].add(rule) if (rule.lhs not in trees[(i, i)] or @@ -349,13 +323,13 @@ def revert_cnf(node): if isinstance(node, T): return node # Reverts TERM rule. - if node.rule.lhs.s.startswith('__T_'): + if node.rule.lhs.name.startswith('__T_'): return node.children[0] else: children = [] for child in map(revert_cnf, node.children): # Reverts BIN rule. - if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): + if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'): children += child.children else: children.append(child) diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 321b829..c64bfee 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -98,14 +98,14 @@ class Parser: for item in to_scan: m = match(item.expect, stream, i) if m: - t = Token(item.expect, m.group(0), i, text_line, text_column) + t = Token(item.expect.name, m.group(0), i, text_line, text_column) delayed_matches[m.end()].append(item.advance(t)) s = m.group(0) for j in range(1, len(s)): m = match(item.expect, s[:-j]) if m: - t = Token(item.expect, m.group(0), i, text_line, text_column) + t = Token(item.expect.name, m.group(0), i, text_line, text_column) delayed_matches[i+m.end()].append(item.advance(t)) next_set = Column(i+1, self.FIRST, predict_all=self.predict_all)