| @@ -4,12 +4,18 @@ import sys | |||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||
| ###{standalone | |||
| def is_terminal(sym): | |||
| return sym.isupper() | |||
| class GrammarError(Exception): | |||
| pass | |||
| class ParseError(Exception): | |||
| pass | |||
| ###} | |||
| class UnexpectedToken(ParseError): | |||
| def __init__(self, token, expected, seq, index): | |||
| @@ -32,9 +38,6 @@ class UnexpectedToken(ParseError): | |||
| def is_terminal(sym): | |||
| return sym.isupper() | |||
| class LexerConf: | |||
| def __init__(self, tokens, ignore=(), postlex=None): | |||
| @@ -166,8 +166,8 @@ class Lark: | |||
| def _build_parser(self): | |||
| self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
| self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
| callback = self.parse_tree_builder.apply(self.options.transformer) | |||
| self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) | |||
| callback = self._parse_tree_builder.create_callback(self.options.transformer) | |||
| if self.profiler: | |||
| for f in dir(callback): | |||
| if not (f.startswith('__') and f.endswith('__')): | |||
| @@ -5,6 +5,7 @@ import re | |||
| from .utils import Str, classify | |||
| from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
| ###{standalone | |||
| class LexError(Exception): | |||
| pass | |||
| @@ -48,10 +49,60 @@ class Token(Str): | |||
| __hash__ = Str.__hash__ | |||
| class Regex: | |||
| def __init__(self, pattern, flags=()): | |||
| self.pattern = pattern | |||
| self.flags = flags | |||
| class LineCounter: | |||
| def __init__(self): | |||
| self.newline_char = '\n' | |||
| self.char_pos = 0 | |||
| self.line = 1 | |||
| self.column = 0 | |||
| self.line_start_pos = 0 | |||
| def feed(self, token, test_newline=True): | |||
| """Consume a token and calculate the new line & column. | |||
| As an optional optimization, set test_newline=False is token doesn't contain a newline. | |||
| """ | |||
| if test_newline: | |||
| newlines = token.count(self.newline_char) | |||
| if newlines: | |||
| self.line += newlines | |||
| self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 | |||
| self.char_pos += len(token) | |||
| self.column = self.char_pos - self.line_start_pos | |||
| class _Lex: | |||
| "Built to serve both Lexer and ContextualLexer" | |||
| def __init__(self, lexer): | |||
| self.lexer = lexer | |||
| def lex(self, stream, newline_types, ignore_types): | |||
| newline_types = list(newline_types) | |||
| newline_types = list(newline_types) | |||
| line_ctr = LineCounter() | |||
| while True: | |||
| lexer = self.lexer | |||
| for mre, type_from_index in lexer.mres: | |||
| m = mre.match(stream, line_ctr.char_pos) | |||
| if m: | |||
| value = m.group(0) | |||
| type_ = type_from_index[m.lastindex] | |||
| if type_ not in ignore_types: | |||
| t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
| if t.type in lexer.callback: | |||
| t = lexer.callback[t.type](t) | |||
| lexer = yield t | |||
| line_ctr.feed(value, type_ in newline_types) | |||
| break | |||
| else: | |||
| if line_ctr.char_pos < len(stream): | |||
| raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
| break | |||
| ###} | |||
| def _regexp_has_newline(r): | |||
| return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) | |||
| @@ -182,57 +233,3 @@ class ContextualLexer: | |||
| l.lexer = self.lexers[self.parser_state] | |||
| ###{lexer | |||
| class LineCounter: | |||
| def __init__(self): | |||
| self.newline_char = '\n' | |||
| self.char_pos = 0 | |||
| self.line = 1 | |||
| self.column = 0 | |||
| self.line_start_pos = 0 | |||
| def feed(self, token, test_newline=True): | |||
| """Consume a token and calculate the new line & column. | |||
| As an optional optimization, set test_newline=False is token doesn't contain a newline. | |||
| """ | |||
| if test_newline: | |||
| newlines = token.count(self.newline_char) | |||
| if newlines: | |||
| self.line += newlines | |||
| self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 | |||
| self.char_pos += len(token) | |||
| self.column = self.char_pos - self.line_start_pos | |||
| class _Lex: | |||
| "Built to serve both Lexer and ContextualLexer" | |||
| def __init__(self, lexer): | |||
| self.lexer = lexer | |||
| def lex(self, stream, newline_types, ignore_types): | |||
| newline_types = list(newline_types) | |||
| newline_types = list(newline_types) | |||
| line_ctr = LineCounter() | |||
| while True: | |||
| lexer = self.lexer | |||
| for mre, type_from_index in lexer.mres: | |||
| m = mre.match(stream, line_ctr.char_pos) | |||
| if m: | |||
| value = m.group(0) | |||
| type_ = type_from_index[m.lastindex] | |||
| if type_ not in ignore_types: | |||
| t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
| if t.type in lexer.callback: | |||
| t = lexer.callback[t.type](t) | |||
| lexer = yield t | |||
| line_ctr.feed(value, type_ in newline_types) | |||
| break | |||
| else: | |||
| if line_ctr.char_pos < len(stream): | |||
| raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) | |||
| break | |||
| ###} | |||
| @@ -128,7 +128,7 @@ RULES = { | |||
| class EBNF_to_BNF(InlineTransformer): | |||
| def __init__(self): | |||
| self.new_rules = {} | |||
| self.new_rules = [] | |||
| self.rules_by_expr = {} | |||
| self.prefix = 'anon' | |||
| self.i = 0 | |||
| @@ -141,7 +141,8 @@ class EBNF_to_BNF(InlineTransformer): | |||
| new_name = '__%s_%s_%d' % (self.prefix, type_, self.i) | |||
| self.i += 1 | |||
| t = Token('RULE', new_name, -1) | |||
| self.new_rules[new_name] = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]), self.rule_options | |||
| tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]) | |||
| self.new_rules.append((new_name, tree, self.rule_options)) | |||
| self.rules_by_expr[expr] = t | |||
| return t | |||
| @@ -390,12 +391,6 @@ def _interleave(l, item): | |||
| def _choice_of_rules(rules): | |||
| return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules]) | |||
| def dict_update_safe(d1, d2): | |||
| for k, v in d2.items(): | |||
| assert k not in d1 | |||
| d1[k] = v | |||
| class Grammar: | |||
| def __init__(self, rule_defs, token_defs, ignore): | |||
| self.token_defs = token_defs | |||
| @@ -468,38 +463,41 @@ class Grammar: | |||
| # ================= | |||
| # Compile Rules | |||
| # ================= | |||
| ebnf_to_bnf = EBNF_to_BNF() | |||
| simplify_rule = SimplifyRule_Visitor() | |||
| # 1. Pre-process terminals | |||
| transformer = PrepareLiterals() | |||
| if not lexer: | |||
| transformer *= SplitLiterals() | |||
| transformer *= ExtractAnonTokens(tokens) # Adds to tokens | |||
| rules = {} | |||
| # 2. Convert EBNF to BNF (and apply step 1) | |||
| ebnf_to_bnf = EBNF_to_BNF() | |||
| rules = [] | |||
| for name, rule_tree, options in rule_defs: | |||
| assert name not in rules, name | |||
| ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | |||
| tree = transformer.transform(rule_tree) | |||
| rules[name] = ebnf_to_bnf.transform(tree), options | |||
| rules.append((name, ebnf_to_bnf.transform(tree), options)) | |||
| rules += ebnf_to_bnf.new_rules | |||
| dict_update_safe(rules, ebnf_to_bnf.new_rules) | |||
| assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" | |||
| # 3. Compile tree to Rule objects | |||
| rule_tree_to_text = RuleTreeToText() | |||
| new_rules = [] | |||
| for origin, (tree, options) in rules.items(): | |||
| simplify_rule = SimplifyRule_Visitor() | |||
| compiled_rules = [] | |||
| for name, tree, options in rules: | |||
| simplify_rule.visit(tree) | |||
| expansions = rule_tree_to_text.transform(tree) | |||
| for expansion, alias in expansions: | |||
| if alias and origin.startswith('_'): | |||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) | |||
| if alias and name.startswith('_'): | |||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||
| rule = Rule(origin, expansion, alias, options) | |||
| new_rules.append(rule) | |||
| rule = Rule(name, expansion, alias, options) | |||
| compiled_rules.append(rule) | |||
| return tokens, new_rules, self.ignore | |||
| return tokens, compiled_rules, self.ignore | |||
| @@ -557,7 +555,7 @@ class GrammarLoader: | |||
| rules = [options_from_rule(name, x) for name, x in RULES.items()] | |||
| rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] | |||
| callback = ParseTreeBuilder(rules, T).apply() | |||
| callback = ParseTreeBuilder(rules, T).create_callback() | |||
| lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) | |||
| parser_conf = ParserConf(rules, callback, 'start') | |||
| @@ -3,6 +3,8 @@ from .utils import suppress | |||
| from .lexer import Token | |||
| from .grammar import Rule | |||
| ###{standalone | |||
| class NodeBuilder: | |||
| def __init__(self, tree_class, name): | |||
| self.tree_class = tree_class | |||
| @@ -130,7 +132,7 @@ class ParseTreeBuilder: | |||
| yield rule, wrapper_chain | |||
| def apply(self, transformer=None): | |||
| def create_callback(self, transformer=None): | |||
| callback = Callback() | |||
| for rule, wrapper_chain in self.rule_builders: | |||
| @@ -152,3 +154,5 @@ class ParseTreeBuilder: | |||
| setattr(callback, internal_callback_name, f) | |||
| return callback | |||
| ###} | |||
| @@ -3,7 +3,7 @@ | |||
| # Author: Erez Shinan (2017) | |||
| # Email : erezshin@gmail.com | |||
| from ..common import ParseError, UnexpectedToken | |||
| from ..common import UnexpectedToken | |||
| from .lalr_analysis import LALR_Analyzer, Shift | |||
| @@ -20,6 +20,8 @@ class Parser: | |||
| self.parser = _Parser(analysis.parse_table, callbacks) | |||
| self.parse = self.parser.parse | |||
| ###{standalone | |||
| class _Parser: | |||
| def __init__(self, parse_table, callbacks): | |||
| self.states = parse_table.states | |||
| @@ -90,3 +92,5 @@ class _Parser: | |||
| return val | |||
| else: | |||
| reduce(arg) | |||
| ###} | |||
| @@ -7,6 +7,7 @@ from copy import deepcopy | |||
| from .utils import inline_args | |||
| ###{standalone | |||
| class Tree(object): | |||
| def __init__(self, data, children): | |||
| self.data = data | |||
| @@ -33,6 +34,7 @@ class Tree(object): | |||
| def pretty(self, indent_str=' '): | |||
| return ''.join(self._pretty(0, indent_str)) | |||
| ###} | |||
| def expand_kids_by_index(self, *indices): | |||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
| @@ -138,7 +140,7 @@ class TransformerChain(object): | |||
| def __mul__(self, other): | |||
| return TransformerChain(*self.transformers + (other,)) | |||
| class InlineTransformer(Transformer): | |||