@@ -8,9 +8,15 @@ class Symbol(object): | |||
assert isinstance(other, Symbol), other | |||
return self.is_term == other.is_term and self.name == other.name | |||
def __ne__(self, other): | |||
return not (self == other) | |||
def __hash__(self): | |||
return hash(self.name) | |||
def __repr__(self): | |||
return '%s(%r)' % (type(self).__name__, self.name) | |||
class Terminal(Symbol): | |||
is_term = True | |||
@@ -3,7 +3,7 @@ | |||
import re | |||
from .utils import Str, classify | |||
from .common import PatternStr, PatternRE, TokenDef | |||
from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
###{standalone | |||
class LexError(Exception): | |||
@@ -234,7 +234,7 @@ class ContextualLexer: | |||
lexer = lexer_by_tokens[key] | |||
except KeyError: | |||
accepts = set(accepts) | set(ignore) | set(always_accept) | |||
state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END'] | |||
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] | |||
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) | |||
lexer_by_tokens[key] = lexer | |||
@@ -110,7 +110,7 @@ class ParseTreeBuilder: | |||
def _init_builders(self, rules): | |||
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} | |||
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} | |||
assert all(t.filter_out for t in filter_out) | |||
assert all(t.name.startswith('_') for t in filter_out) | |||
for rule in rules: | |||
options = rule.options | |||
@@ -7,7 +7,11 @@ from .lexer import Lexer, ContextualLexer, Token | |||
from .common import GrammarError | |||
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | |||
from .tree import Tree | |||
from .grammar import Terminal | |||
from .grammar import Terminal, NonTerminal | |||
def terminals(seq): | |||
# return [Terminal(t) for t in seq] | |||
return seq | |||
class WithLexer: | |||
def init_traditional_lexer(self, lexer_conf): | |||
@@ -18,7 +22,10 @@ class WithLexer: | |||
self.lexer_conf = lexer_conf | |||
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | |||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks) | |||
self.lexer = ContextualLexer(lexer_conf.tokens, states, | |||
ignore=terminals(lexer_conf.ignore), | |||
always_accept=terminals(always_accept), | |||
user_callbacks=lexer_conf.callbacks) | |||
def lex(self, text): | |||
stream = self.lexer.lex(text) | |||
@@ -74,7 +81,7 @@ class Earley_NoLex: | |||
def match(self, term, text, index=0): | |||
return self.regexps[term].match(text, index) | |||
return self.regexps[term.name].match(text, index) | |||
def _prepare_match(self, lexer_conf): | |||
self.regexps = {} | |||
@@ -8,47 +8,19 @@ | |||
from collections import defaultdict | |||
import itertools | |||
from ..common import ParseError, is_terminal | |||
from ..common import ParseError | |||
from ..lexer import Token | |||
from ..tree import Tree | |||
from ..grammar import Terminal as T, NonTerminal as NT, Symbol | |||
try: | |||
xrange | |||
except NameError: | |||
xrange = range | |||
class Symbol(object): | |||
"""Any grammar symbol.""" | |||
def __init__(self, s): | |||
self.s = s | |||
def __repr__(self): | |||
return '%s(%s)' % (type(self).__name__, str(self)) | |||
def __str__(self): | |||
return str(self.s) | |||
def __eq__(self, other): | |||
return self.s == str(other) | |||
def __ne__(self, other): | |||
return not self.__eq__(other) | |||
def __hash__(self): | |||
return hash((type(self), str(self.s))) | |||
class T(Symbol): | |||
"""Terminal.""" | |||
def match(self, s): | |||
return self.s == s.type | |||
class NT(Symbol): | |||
"""Non-terminal.""" | |||
pass | |||
def match(t, s): | |||
assert isinstance(t, T) | |||
return t.name == s.type | |||
class Rule(object): | |||
@@ -121,10 +93,12 @@ class Parser(object): | |||
def _to_rule(self, lark_rule): | |||
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" | |||
assert isinstance(lark_rule.origin, NT) | |||
assert all(isinstance(x, Symbol) for x in lark_rule.expansion) | |||
return Rule( | |||
NT(lark_rule.origin), [ | |||
T(x) if is_terminal(x) else NT(x) for x in lark_rule.expansion | |||
], weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, alias=lark_rule.alias) | |||
lark_rule.origin, lark_rule.expansion, | |||
weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, | |||
alias=lark_rule.alias) | |||
def parse(self, tokenized): # pylint: disable=invalid-name | |||
"""Parses input, which is a list of tokens.""" | |||
@@ -132,7 +106,7 @@ class Parser(object): | |||
# Check if the parse succeeded. | |||
if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): | |||
raise ParseError('Parsing failed.') | |||
parse = trees[(0, len(tokenized) - 1)][NT(self.start)] | |||
parse = trees[(0, len(tokenized) - 1)][self.start] | |||
return self._to_tree(revert_cnf(parse)) | |||
def _to_tree(self, rule_node): | |||
@@ -143,8 +117,8 @@ class Parser(object): | |||
if isinstance(child, RuleNode): | |||
children.append(self._to_tree(child)) | |||
else: | |||
assert isinstance(child.s, Token) | |||
children.append(child.s) | |||
assert isinstance(child.name, Token) | |||
children.append(child.name) | |||
t = Tree(orig_rule.origin, children) | |||
t.rule=orig_rule | |||
return t | |||
@@ -169,7 +143,7 @@ def _parse(s, g): | |||
# Populate base case with existing terminal production rules | |||
for i, w in enumerate(s): | |||
for terminal, rules in g.terminal_rules.items(): | |||
if terminal.match(w): | |||
if match(terminal, w): | |||
for rule in rules: | |||
table[(i, i)].add(rule) | |||
if (rule.lhs not in trees[(i, i)] or | |||
@@ -349,13 +323,13 @@ def revert_cnf(node): | |||
if isinstance(node, T): | |||
return node | |||
# Reverts TERM rule. | |||
if node.rule.lhs.s.startswith('__T_'): | |||
if node.rule.lhs.name.startswith('__T_'): | |||
return node.children[0] | |||
else: | |||
children = [] | |||
for child in map(revert_cnf, node.children): | |||
# Reverts BIN rule. | |||
if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): | |||
if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'): | |||
children += child.children | |||
else: | |||
children.append(child) | |||
@@ -98,14 +98,14 @@ class Parser: | |||
for item in to_scan: | |||
m = match(item.expect, stream, i) | |||
if m: | |||
t = Token(item.expect, m.group(0), i, text_line, text_column) | |||
t = Token(item.expect.name, m.group(0), i, text_line, text_column) | |||
delayed_matches[m.end()].append(item.advance(t)) | |||
s = m.group(0) | |||
for j in range(1, len(s)): | |||
m = match(item.expect, s[:-j]) | |||
if m: | |||
t = Token(item.expect, m.group(0), i, text_line, text_column) | |||
t = Token(item.expect.name, m.group(0), i, text_line, text_column) | |||
delayed_matches[i+m.end()].append(item.advance(t)) | |||
next_set = Column(i+1, self.FIRST, predict_all=self.predict_all) | |||