More refactoring towards standalone

8 years ago · da1910f5b6
--- a/lark/common.py
+++ b/lark/common.py
@@ -4,12 +4,18 @@ import sys

 Py36 = (sys.version_info[:2] >= (3, 6))


 ###{standalone
 def is_terminal(sym):
    return sym.isupper()

 class GrammarError(Exception):
    pass

 class ParseError(Exception):
    pass

 ###}

 class UnexpectedToken(ParseError):
    def __init__(self, token, expected, seq, index):
@@ -32,9 +38,6 @@ class UnexpectedToken(ParseError):



 def is_terminal(sym):
    return sym.isupper()


 class LexerConf:
    def __init__(self, tokens, ignore=(), postlex=None):
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -166,8 +166,8 @@ class Lark:
    def _build_parser(self):
        self.parser_class = get_frontend(self.options.parser, self.options.lexer)

        self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
        callback = self.parse_tree_builder.apply(self.options.transformer)
        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
        callback = self._parse_tree_builder.create_callback(self.options.transformer)
        if self.profiler:
            for f in dir(callback):
                if not (f.startswith('__') and f.endswith('__')):
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -5,6 +5,7 @@ import re
 from .utils import Str, classify
 from .common import is_terminal, PatternStr, PatternRE, TokenDef

 ###{standalone
 class LexError(Exception):
    pass

@@ -48,10 +49,60 @@ class Token(Str):

    __hash__ = Str.__hash__

 class Regex:
    def __init__(self, pattern, flags=()):
        self.pattern = pattern
        self.flags = flags

 class LineCounter:
    def __init__(self):
        self.newline_char = '\n'
        self.char_pos = 0
        self.line = 1
        self.column = 0
        self.line_start_pos = 0

    def feed(self, token, test_newline=True):
        """Consume a token and calculate the new line & column.

        As an optional optimization, set test_newline=False is token doesn't contain a newline.
        """
        if test_newline:
            newlines = token.count(self.newline_char)
            if newlines:
                self.line += newlines
                self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1

        self.char_pos += len(token)
        self.column = self.char_pos - self.line_start_pos

 class _Lex:
    "Built to serve both Lexer and ContextualLexer"
    def __init__(self, lexer):
        self.lexer = lexer

    def lex(self, stream, newline_types, ignore_types):
        newline_types = list(newline_types)
        newline_types = list(newline_types)
        line_ctr = LineCounter()

        while True:
            lexer = self.lexer
            for mre, type_from_index in lexer.mres:
                m = mre.match(stream, line_ctr.char_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    if type_ not in ignore_types:
                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                        if t.type in lexer.callback:
                            t = lexer.callback[t.type](t)
                        lexer = yield t

                    line_ctr.feed(value, type_ in newline_types)
                    break
            else:
                if line_ctr.char_pos < len(stream):
                    raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                break
 ###}


 def _regexp_has_newline(r):
    return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)
@@ -182,57 +233,3 @@ class ContextualLexer:
            l.lexer = self.lexers[self.parser_state]


 ###{lexer

 class LineCounter:
    def __init__(self):
        self.newline_char = '\n'
        self.char_pos = 0
        self.line = 1
        self.column = 0
        self.line_start_pos = 0

    def feed(self, token, test_newline=True):
        """Consume a token and calculate the new line & column.

        As an optional optimization, set test_newline=False is token doesn't contain a newline.
        """
        if test_newline:
            newlines = token.count(self.newline_char)
            if newlines:
                self.line += newlines
                self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1

        self.char_pos += len(token)
        self.column = self.char_pos - self.line_start_pos

 class _Lex:
    "Built to serve both Lexer and ContextualLexer"
    def __init__(self, lexer):
        self.lexer = lexer

    def lex(self, stream, newline_types, ignore_types):
        newline_types = list(newline_types)
        newline_types = list(newline_types)
        line_ctr = LineCounter()

        while True:
            lexer = self.lexer
            for mre, type_from_index in lexer.mres:
                m = mre.match(stream, line_ctr.char_pos)
                if m:
                    value = m.group(0)
                    type_ = type_from_index[m.lastindex]
                    if type_ not in ignore_types:
                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                        if t.type in lexer.callback:
                            t = lexer.callback[t.type](t)
                        lexer = yield t

                    line_ctr.feed(value, type_ in newline_types)
                    break
            else:
                if line_ctr.char_pos < len(stream):
                    raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
                break
 ###}
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -128,7 +128,7 @@ RULES = {

 class EBNF_to_BNF(InlineTransformer):
    def __init__(self):
        self.new_rules = {}
        self.new_rules = []
        self.rules_by_expr = {}
        self.prefix = 'anon'
        self.i = 0
@@ -141,7 +141,8 @@ class EBNF_to_BNF(InlineTransformer):
        new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
        self.i += 1
        t = Token('RULE', new_name, -1)
        self.new_rules[new_name] = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])]), self.rule_options
        tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])])
        self.new_rules.append((new_name, tree, self.rule_options))
        self.rules_by_expr[expr] = t
        return t

@@ -390,12 +391,6 @@ def _interleave(l, item):
 def _choice_of_rules(rules):
    return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules])

 def dict_update_safe(d1, d2):
    for k, v in d2.items():
        assert k not in d1
        d1[k] = v


 class Grammar:
    def __init__(self, rule_defs, token_defs, ignore):
        self.token_defs = token_defs
@@ -468,38 +463,41 @@ class Grammar:
        # =================
        #  Compile Rules
        # =================
        ebnf_to_bnf = EBNF_to_BNF()
        simplify_rule = SimplifyRule_Visitor()

        # 1. Pre-process terminals
        transformer = PrepareLiterals()
        if not lexer:
            transformer *= SplitLiterals()
        transformer *= ExtractAnonTokens(tokens)   # Adds to tokens

        rules = {}
        # 2. Convert EBNF to BNF (and apply step 1)
        ebnf_to_bnf = EBNF_to_BNF()
        rules = []
        for name, rule_tree, options in rule_defs:
            assert name not in rules, name
            ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
            tree = transformer.transform(rule_tree)
            rules[name] = ebnf_to_bnf.transform(tree), options
            rules.append((name, ebnf_to_bnf.transform(tree), options))
        rules += ebnf_to_bnf.new_rules

        dict_update_safe(rules, ebnf_to_bnf.new_rules)
        assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"

        # 3. Compile tree to Rule objects
        rule_tree_to_text = RuleTreeToText()

        new_rules = []
        for origin, (tree, options) in rules.items():
        simplify_rule = SimplifyRule_Visitor()
        compiled_rules = []
        for name, tree, options in rules:
            simplify_rule.visit(tree)
            expansions = rule_tree_to_text.transform(tree)

            for expansion, alias in expansions:
                if alias and origin.startswith('_'):
                    raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias))
                if alias and name.startswith('_'):
                    raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))

                rule = Rule(origin, expansion, alias, options)
                new_rules.append(rule)
                rule = Rule(name, expansion, alias, options)
                compiled_rules.append(rule)

        return tokens, new_rules, self.ignore
        return tokens, compiled_rules, self.ignore



@@ -557,7 +555,7 @@ class GrammarLoader:

        rules = [options_from_rule(name, x) for name, x in RULES.items()]
        rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
        callback = ParseTreeBuilder(rules, T).apply()
        callback = ParseTreeBuilder(rules, T).create_callback()
        lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])

        parser_conf = ParserConf(rules, callback, 'start')
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -3,6 +3,8 @@ from .utils import suppress
 from .lexer import Token
 from .grammar import Rule

 ###{standalone

 class NodeBuilder:
    def __init__(self, tree_class, name):
        self.tree_class = tree_class
@@ -130,7 +132,7 @@ class ParseTreeBuilder:
            yield rule, wrapper_chain


    def apply(self, transformer=None):
    def create_callback(self, transformer=None):
        callback = Callback()

        for rule, wrapper_chain in self.rule_builders:
@@ -152,3 +154,5 @@ class ParseTreeBuilder:
            setattr(callback, internal_callback_name, f)

        return callback

 ###}
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -3,7 +3,7 @@
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com

 from ..common import ParseError, UnexpectedToken
 from ..common import UnexpectedToken

 from .lalr_analysis import LALR_Analyzer, Shift

@@ -20,6 +20,8 @@ class Parser:
        self.parser = _Parser(analysis.parse_table, callbacks)
        self.parse = self.parser.parse

 ###{standalone

 class _Parser:
    def __init__(self, parse_table, callbacks):
        self.states = parse_table.states
@@ -90,3 +92,5 @@ class _Parser:
                return val
            else:
                reduce(arg)

 ###}
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -7,6 +7,7 @@ from copy import deepcopy

 from .utils import inline_args

 ###{standalone
 class Tree(object):
    def __init__(self, data, children):
        self.data = data
@@ -33,6 +34,7 @@ class Tree(object):

    def pretty(self, indent_str='  '):
        return ''.join(self._pretty(0, indent_str))
 ###}

    def expand_kids_by_index(self, *indices):
        for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
@@ -138,7 +140,7 @@ class TransformerChain(object):

    def __mul__(self, other):
        return TransformerChain(*self.transformers + (other,))
        



 class InlineTransformer(Transformer):