@@ -4,12 +4,18 @@ import sys | |||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||
###{standalone | |||
def is_terminal(sym): | |||
return sym.isupper() | |||
class GrammarError(Exception): | |||
pass | |||
class ParseError(Exception): | |||
pass | |||
###} | |||
class UnexpectedToken(ParseError): | |||
def __init__(self, token, expected, seq, index): | |||
@@ -32,9 +38,6 @@ class UnexpectedToken(ParseError): | |||
def is_terminal(sym): | |||
return sym.isupper() | |||
class LexerConf: | |||
def __init__(self, tokens, ignore=(), postlex=None): | |||
@@ -166,8 +166,8 @@ class Lark: | |||
def _build_parser(self): | |||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
callback = self.parse_tree_builder.apply(self.options.transformer) | |||
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
callback = self._parse_tree_builder.create_callback(self.options.transformer) | |||
if self.profiler: | |||
for f in dir(callback): | |||
if not (f.startswith('__') and f.endswith('__')): | |||
@@ -5,6 +5,7 @@ import re | |||
from .utils import Str, classify | |||
from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
###{standalone | |||
class LexError(Exception): | |||
pass | |||
@@ -48,10 +49,60 @@ class Token(Str): | |||
__hash__ = Str.__hash__ | |||
class Regex: | |||
def __init__(self, pattern, flags=()): | |||
self.pattern = pattern | |||
self.flags = flags | |||
class LineCounter: | |||
def __init__(self): | |||
self.newline_char = '\n' | |||
self.char_pos = 0 | |||
self.line = 1 | |||
self.column = 0 | |||
self.line_start_pos = 0 | |||
def feed(self, token, test_newline=True): | |||
"""Consume a token and calculate the new line & column. | |||
As an optional optimization, set test_newline=False is token doesn't contain a newline. | |||
""" | |||
if test_newline: | |||
newlines = token.count(self.newline_char) | |||
if newlines: | |||
self.line += newlines | |||
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 | |||
self.char_pos += len(token) | |||
self.column = self.char_pos - self.line_start_pos | |||
class _Lex: | |||
"Built to serve both Lexer and ContextualLexer" | |||
def __init__(self, lexer): | |||
self.lexer = lexer | |||
def lex(self, stream, newline_types, ignore_types): | |||
newline_types = list(newline_types) | |||
newline_types = list(newline_types) | |||
line_ctr = LineCounter() | |||
while True: | |||
lexer = self.lexer | |||
for mre, type_from_index in lexer.mres: | |||
m = mre.match(stream, line_ctr.char_pos) | |||
if m: | |||
value = m.group(0) | |||
type_ = type_from_index[m.lastindex] | |||
if type_ not in ignore_types: | |||
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
if t.type in lexer.callback: | |||
t = lexer.callback[t.type](t) | |||
lexer = yield t | |||
line_ctr.feed(value, type_ in newline_types) | |||
break | |||
else: | |||
if line_ctr.char_pos < len(stream): | |||
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
break | |||
###} | |||
def _regexp_has_newline(r): | |||
return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) | |||
@@ -182,57 +233,3 @@ class ContextualLexer: | |||
l.lexer = self.lexers[self.parser_state] | |||
###{lexer | |||
class LineCounter: | |||
def __init__(self): | |||
self.newline_char = '\n' | |||
self.char_pos = 0 | |||
self.line = 1 | |||
self.column = 0 | |||
self.line_start_pos = 0 | |||
def feed(self, token, test_newline=True): | |||
"""Consume a token and calculate the new line & column. | |||
As an optional optimization, set test_newline=False is token doesn't contain a newline. | |||
""" | |||
if test_newline: | |||
newlines = token.count(self.newline_char) | |||
if newlines: | |||
self.line += newlines | |||
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 | |||
self.char_pos += len(token) | |||
self.column = self.char_pos - self.line_start_pos | |||
class _Lex: | |||
"Built to serve both Lexer and ContextualLexer" | |||
def __init__(self, lexer): | |||
self.lexer = lexer | |||
def lex(self, stream, newline_types, ignore_types): | |||
newline_types = list(newline_types) | |||
newline_types = list(newline_types) | |||
line_ctr = LineCounter() | |||
while True: | |||
lexer = self.lexer | |||
for mre, type_from_index in lexer.mres: | |||
m = mre.match(stream, line_ctr.char_pos) | |||
if m: | |||
value = m.group(0) | |||
type_ = type_from_index[m.lastindex] | |||
if type_ not in ignore_types: | |||
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
if t.type in lexer.callback: | |||
t = lexer.callback[t.type](t) | |||
lexer = yield t | |||
line_ctr.feed(value, type_ in newline_types) | |||
break | |||
else: | |||
if line_ctr.char_pos < len(stream): | |||
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
break | |||
###} |
@@ -128,7 +128,7 @@ RULES = { | |||
class EBNF_to_BNF(InlineTransformer): | |||
def __init__(self): | |||
self.new_rules = {} | |||
self.new_rules = [] | |||
self.rules_by_expr = {} | |||
self.prefix = 'anon' | |||
self.i = 0 | |||
@@ -141,7 +141,8 @@ class EBNF_to_BNF(InlineTransformer): | |||
new_name = '__%s_%s_%d' % (self.prefix, type_, self.i) | |||
self.i += 1 | |||
t = Token('RULE', new_name, -1) | |||
self.new_rules[new_name] = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]), self.rule_options | |||
tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]) | |||
self.new_rules.append((new_name, tree, self.rule_options)) | |||
self.rules_by_expr[expr] = t | |||
return t | |||
@@ -390,12 +391,6 @@ def _interleave(l, item): | |||
def _choice_of_rules(rules): | |||
return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules]) | |||
def dict_update_safe(d1, d2): | |||
for k, v in d2.items(): | |||
assert k not in d1 | |||
d1[k] = v | |||
class Grammar: | |||
def __init__(self, rule_defs, token_defs, ignore): | |||
self.token_defs = token_defs | |||
@@ -468,38 +463,41 @@ class Grammar: | |||
# ================= | |||
# Compile Rules | |||
# ================= | |||
ebnf_to_bnf = EBNF_to_BNF() | |||
simplify_rule = SimplifyRule_Visitor() | |||
# 1. Pre-process terminals | |||
transformer = PrepareLiterals() | |||
if not lexer: | |||
transformer *= SplitLiterals() | |||
transformer *= ExtractAnonTokens(tokens) # Adds to tokens | |||
rules = {} | |||
# 2. Convert EBNF to BNF (and apply step 1) | |||
ebnf_to_bnf = EBNF_to_BNF() | |||
rules = [] | |||
for name, rule_tree, options in rule_defs: | |||
assert name not in rules, name | |||
ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | |||
tree = transformer.transform(rule_tree) | |||
rules[name] = ebnf_to_bnf.transform(tree), options | |||
rules.append((name, ebnf_to_bnf.transform(tree), options)) | |||
rules += ebnf_to_bnf.new_rules | |||
dict_update_safe(rules, ebnf_to_bnf.new_rules) | |||
assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" | |||
# 3. Compile tree to Rule objects | |||
rule_tree_to_text = RuleTreeToText() | |||
new_rules = [] | |||
for origin, (tree, options) in rules.items(): | |||
simplify_rule = SimplifyRule_Visitor() | |||
compiled_rules = [] | |||
for name, tree, options in rules: | |||
simplify_rule.visit(tree) | |||
expansions = rule_tree_to_text.transform(tree) | |||
for expansion, alias in expansions: | |||
if alias and origin.startswith('_'): | |||
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) | |||
if alias and name.startswith('_'): | |||
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||
rule = Rule(origin, expansion, alias, options) | |||
new_rules.append(rule) | |||
rule = Rule(name, expansion, alias, options) | |||
compiled_rules.append(rule) | |||
return tokens, new_rules, self.ignore | |||
return tokens, compiled_rules, self.ignore | |||
@@ -557,7 +555,7 @@ class GrammarLoader: | |||
rules = [options_from_rule(name, x) for name, x in RULES.items()] | |||
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] | |||
callback = ParseTreeBuilder(rules, T).apply() | |||
callback = ParseTreeBuilder(rules, T).create_callback() | |||
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) | |||
parser_conf = ParserConf(rules, callback, 'start') | |||
@@ -3,6 +3,8 @@ from .utils import suppress | |||
from .lexer import Token | |||
from .grammar import Rule | |||
###{standalone | |||
class NodeBuilder: | |||
def __init__(self, tree_class, name): | |||
self.tree_class = tree_class | |||
@@ -130,7 +132,7 @@ class ParseTreeBuilder: | |||
yield rule, wrapper_chain | |||
def apply(self, transformer=None): | |||
def create_callback(self, transformer=None): | |||
callback = Callback() | |||
for rule, wrapper_chain in self.rule_builders: | |||
@@ -152,3 +154,5 @@ class ParseTreeBuilder: | |||
setattr(callback, internal_callback_name, f) | |||
return callback | |||
###} |
@@ -3,7 +3,7 @@ | |||
# Author: Erez Shinan (2017) | |||
# Email : erezshin@gmail.com | |||
from ..common import ParseError, UnexpectedToken | |||
from ..common import UnexpectedToken | |||
from .lalr_analysis import LALR_Analyzer, Shift | |||
@@ -20,6 +20,8 @@ class Parser: | |||
self.parser = _Parser(analysis.parse_table, callbacks) | |||
self.parse = self.parser.parse | |||
###{standalone | |||
class _Parser: | |||
def __init__(self, parse_table, callbacks): | |||
self.states = parse_table.states | |||
@@ -90,3 +92,5 @@ class _Parser: | |||
return val | |||
else: | |||
reduce(arg) | |||
###} |
@@ -7,6 +7,7 @@ from copy import deepcopy | |||
from .utils import inline_args | |||
###{standalone | |||
class Tree(object): | |||
def __init__(self, data, children): | |||
self.data = data | |||
@@ -33,6 +34,7 @@ class Tree(object): | |||
def pretty(self, indent_str=' '): | |||
return ''.join(self._pretty(0, indent_str)) | |||
###} | |||
def expand_kids_by_index(self, *indices): | |||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
@@ -138,7 +140,7 @@ class TransformerChain(object): | |||
def __mul__(self, other): | |||
return TransformerChain(*self.transformers + (other,)) | |||
class InlineTransformer(Transformer): | |||