Changed parser/lexer interface in lark. Bumped minor version

7 anni fa · a5a20a423a
--- a/examples/calc.py
+++ b/examples/calc.py
@@ -22,13 +22,13 @@ calc_grammar = """
        | product "*" atom  -> mul
        | product "/" atom  -> div

    ?atom: DECIMAL          -> number
    ?atom: NUMBER           -> number
         | "-" atom         -> neg
         | NAME             -> var
         | "(" sum ")"

    %import common.CNAME -> NAME
    %import common.DECIMAL
    %import common.NUMBER
    %import common.WS_INLINE

    %ignore WS_INLINE
--- a/examples/conf.py
+++ b/examples/conf.py
@@ -26,7 +26,7 @@ parser = Lark(r"""

        %ignore /[\t \f]+/
        %ignore /\#[^\n]*/
    """, parser="lalr_contextual_lexer")
    """, parser="lalr", lexer="contextual")


 sample_conf = """
--- a/examples/conf_nolex.py
+++ b/examples/conf_nolex.py
@@ -24,7 +24,7 @@ parser = Lark(r"""

        _CR : /\r/
        _LF : /\n/
    """, parser="earley_nolex")
    """, lexer=None)

 class RestoreTokens(Transformer):
    value = ''.join
--- a/examples/indented_tree.py
+++ b/examples/indented_tree.py
@@ -16,9 +16,10 @@ tree_grammar = r"""

    tree: NAME _NL [_INDENT tree+ _DEDENT]

    NAME: /\w+/
    %import common.CNAME -> NAME
    %import common.WS_INLINE
    %ignore WS_INLINE

    WS.ignore: /\s+/
    _NL: /(\r?\n[\t ]*)+/
    _INDENT: "<INDENT>"
    _DEDENT: "<DEDENT>"
--- a/lark/init.py
+++ b/lark/init.py
@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError
 from .lark import Lark
 from .utils import inline_args

 __version__ = "0.1.2"
 __version__ = "0.2.0"
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf

 from .lexer import Lexer
 from .parse_tree_builder import ParseTreeBuilder
 from .parser_frontends import ENGINE_DICT
 from .parser_frontends import get_frontend

 class LarkOptions(object):
    """Specifies the options for Lark
@@ -19,7 +19,13 @@ class LarkOptions(object):
    """
    OPTIONS_DOC = """
        parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
                 Note: Both will use Lark's lexer.
                 Note: "lalr" requires a lexer
        lexer - Whether or not to use a lexer stage
            None: Don't use a lexer
            "standard": Use a standard lexer
            "contextual": Stronger lexer (only works with parser="lalr")
            "auto" (default): Choose for me based on grammar and parser

        transformer - Applies the transformer to every parse tree
        debug - Affects verbosity (default: False)
        only_lex - Don't build a parser. Useful for debugging (default: False)
@@ -40,11 +46,12 @@ class LarkOptions(object):
        self.cache_grammar = o.pop('cache_grammar', False)
        self.postlex = o.pop('postlex', None)
        self.parser = o.pop('parser', 'earley')
        self.lexer = o.pop('lexer', 'auto')
        self.transformer = o.pop('transformer', None)
        self.start = o.pop('start', 'start')
        self.profile = o.pop('profile', False)

        assert self.parser in ENGINE_DICT
        # assert self.parser in ENGINE_DICT
        if self.parser == 'earley' and self.transformer:
            raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
                             'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
@@ -118,9 +125,15 @@ class Lark:

        self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)

        if not self.options.only_lex:
        if self.options.lexer == 'auto':
            if self.options.parser == 'lalr':
                self.options.lexer = 'standard'
            elif self.options.parser == 'earley':
                self.options.lexer = 'standard'

        if self.options.parser:
            self.parser = self._build_parser()
        else:
        elif self.options.lexer:
            self.lexer = self._build_lexer()

        if self.profiler: self.profiler.enter_section('outside_lark')
@@ -131,7 +144,7 @@ class Lark:
        return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)

    def _build_parser(self):
        self.parser_class = ENGINE_DICT[self.options.parser]
        self.parser_class = get_frontend(self.options.parser, self.options.lexer)
        self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class)
        rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
        if self.profiler:
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -88,7 +88,9 @@ class Lexer(object):
                raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern))

        token_names = {t.name for t in tokens}
        assert all(t in token_names for t in ignore)
        for t in ignore:
            if t not in token_names:
                raise LexError("Token '%s' was marked to ignore but it is not defined!" % t)

        # Init
        self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -131,7 +131,7 @@ class Earley_NoLex:
    def _prepare_expansion(self, expansion):
        for sym in expansion:
            if is_terminal(sym):
                regexp = self.token_by_name[sym].to_regexp()
                regexp = self.token_by_name[sym].pattern.to_regexp()
                width = sre_parse.parse(regexp).getwidth()
                if not width == (1,1):
                    raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
@@ -144,9 +144,28 @@ class Earley_NoLex:
        assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
        return res[0]

 ENGINE_DICT = {
    'lalr': LALR,
    'earley': Earley,
    'earley_nolex': Earley_NoLex,
    'lalr_contextual_lexer': LALR_ContextualLexer
 }

 def get_frontend(parser, lexer):
    if parser=='lalr':
        if lexer is None:
            raise ValueError('The LALR parser requires use of a lexer')
        elif lexer == 'standard':
            return LALR
        elif lexer == 'contextual':
            return LALR_ContextualLexer
        else:
            raise ValueError('Unknown lexer: %s' % lexer)
    elif parser=='earley':
        if lexer is None:
            return Earley_NoLex
        elif lexer=='standard':
            return Earley
        elif lexer=='contextual':
            raise ValueError('The Earley parser does not support the contextual parser')
        else:
            raise ValueError('Unknown lexer: %s' % lexer)
    else:
        raise ValueError('Unknown parser: %s' % parser)



--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -2,8 +2,8 @@ import re
 from collections import defaultdict

 from .tree import Tree
 from .common import is_terminal, ParserConf
 from .lexer import Token, TokenDef__Str
 from .common import is_terminal, ParserConf, PatternStr
 from .lexer import Token
 from .parsers import earley
 from .lark import Lark

@@ -22,7 +22,7 @@ def is_iter_empty(i):
 class Reconstructor:
    def __init__(self, parser):
        tokens = {t.name:t for t in parser.lexer_conf.tokens}
        token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}
        token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens}

        class MatchData:
            def __init__(self, data):
@@ -50,8 +50,8 @@ class Reconstructor:
                for sym in self.expansion:
                    if is_discarded_terminal(sym):
                        t = tokens[sym]
                        assert isinstance(t, TokenDef__Str)
                        to_write.append(t.value)
                        assert isinstance(t.pattern, PatternStr)
                        to_write.append(t.pattern.value)
                    else:
                        x = next(args2)
                        if isinstance(x, list):
--- a/tests/main.py
+++ b/tests/main.py
@@ -5,7 +5,7 @@ import logging

 from .test_trees import TestTrees
 # from .test_selectors import TestSelectors
 from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers
 from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers
 # from .test_grammars import TestPythonG, TestConfigG

 logging.basicConfig(level=logging.INFO)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase):
 class TestEarley(unittest.TestCase):
    pass

 def _make_parser_test(PARSER):
 def _make_parser_test(LEXER, PARSER):
    def _Lark(grammar, **kwargs):
        return Lark(grammar, parser=PARSER, **kwargs)
        return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)
    class _TestParser(unittest.TestCase):
        def test_basic1(self):
            g = _Lark("""start: a+ b a* "b" a*
@@ -397,12 +397,18 @@ def _make_parser_test(PARSER):
            g.parse("+2e-9")
            self.assertRaises(ParseError, g.parse, "+2e-9e")

    _NAME = "Test" + PARSER.capitalize()
    _NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize()
    _TestParser.__name__ = _NAME
    globals()[_NAME] = _TestParser

 for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']:
    _make_parser_test(PARSER)
 _TO_TEST = [
        ('standard', 'earley'),
        ('standard', 'lalr'),
        ('contextual', 'lalr'),
 ]

 for LEXER, PARSER in _TO_TEST:
    _make_parser_test(LEXER, PARSER)


 if __name__ == '__main__':