| @@ -22,13 +22,13 @@ calc_grammar = """ | |||
| | product "*" atom -> mul | |||
| | product "/" atom -> div | |||
| ?atom: DECIMAL -> number | |||
| ?atom: NUMBER -> number | |||
| | "-" atom -> neg | |||
| | NAME -> var | |||
| | "(" sum ")" | |||
| %import common.CNAME -> NAME | |||
| %import common.DECIMAL | |||
| %import common.NUMBER | |||
| %import common.WS_INLINE | |||
| %ignore WS_INLINE | |||
| @@ -26,7 +26,7 @@ parser = Lark(r""" | |||
| %ignore /[\t \f]+/ | |||
| %ignore /\#[^\n]*/ | |||
| """, parser="lalr_contextual_lexer") | |||
| """, parser="lalr", lexer="contextual") | |||
| sample_conf = """ | |||
| @@ -24,7 +24,7 @@ parser = Lark(r""" | |||
| _CR : /\r/ | |||
| _LF : /\n/ | |||
| """, parser="earley_nolex") | |||
| """, lexer=None) | |||
| class RestoreTokens(Transformer): | |||
| value = ''.join | |||
| @@ -16,9 +16,10 @@ tree_grammar = r""" | |||
| tree: NAME _NL [_INDENT tree+ _DEDENT] | |||
| NAME: /\w+/ | |||
| %import common.CNAME -> NAME | |||
| %import common.WS_INLINE | |||
| %ignore WS_INLINE | |||
| WS.ignore: /\s+/ | |||
| _NL: /(\r?\n[\t ]*)+/ | |||
| _INDENT: "<INDENT>" | |||
| _DEDENT: "<DEDENT>" | |||
| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
| from .lark import Lark | |||
| from .utils import inline_args | |||
| __version__ = "0.1.2" | |||
| __version__ = "0.2.0" | |||
| @@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf | |||
| from .lexer import Lexer | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| from .parser_frontends import ENGINE_DICT | |||
| from .parser_frontends import get_frontend | |||
| class LarkOptions(object): | |||
| """Specifies the options for Lark | |||
| @@ -19,7 +19,13 @@ class LarkOptions(object): | |||
| """ | |||
| OPTIONS_DOC = """ | |||
| parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | |||
| Note: Both will use Lark's lexer. | |||
| Note: "lalr" requires a lexer | |||
| lexer - Whether or not to use a lexer stage | |||
| None: Don't use a lexer | |||
| "standard": Use a standard lexer | |||
| "contextual": Stronger lexer (only works with parser="lalr") | |||
| "auto" (default): Choose for me based on grammar and parser | |||
| transformer - Applies the transformer to every parse tree | |||
| debug - Affects verbosity (default: False) | |||
| only_lex - Don't build a parser. Useful for debugging (default: False) | |||
| @@ -40,11 +46,12 @@ class LarkOptions(object): | |||
| self.cache_grammar = o.pop('cache_grammar', False) | |||
| self.postlex = o.pop('postlex', None) | |||
| self.parser = o.pop('parser', 'earley') | |||
| self.lexer = o.pop('lexer', 'auto') | |||
| self.transformer = o.pop('transformer', None) | |||
| self.start = o.pop('start', 'start') | |||
| self.profile = o.pop('profile', False) | |||
| assert self.parser in ENGINE_DICT | |||
| # assert self.parser in ENGINE_DICT | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | |||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
| @@ -118,9 +125,15 @@ class Lark: | |||
| self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
| if not self.options.only_lex: | |||
| if self.options.lexer == 'auto': | |||
| if self.options.parser == 'lalr': | |||
| self.options.lexer = 'standard' | |||
| elif self.options.parser == 'earley': | |||
| self.options.lexer = 'standard' | |||
| if self.options.parser: | |||
| self.parser = self._build_parser() | |||
| else: | |||
| elif self.options.lexer: | |||
| self.lexer = self._build_lexer() | |||
| if self.profiler: self.profiler.enter_section('outside_lark') | |||
| @@ -131,7 +144,7 @@ class Lark: | |||
| return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | |||
| def _build_parser(self): | |||
| self.parser_class = ENGINE_DICT[self.options.parser] | |||
| self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
| rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
| if self.profiler: | |||
| @@ -88,7 +88,9 @@ class Lexer(object): | |||
| raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | |||
| token_names = {t.name for t in tokens} | |||
| assert all(t in token_names for t in ignore) | |||
| for t in ignore: | |||
| if t not in token_names: | |||
| raise LexError("Token '%s' was marked to ignore but it is not defined!" % t) | |||
| # Init | |||
| self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | |||
| @@ -131,7 +131,7 @@ class Earley_NoLex: | |||
| def _prepare_expansion(self, expansion): | |||
| for sym in expansion: | |||
| if is_terminal(sym): | |||
| regexp = self.token_by_name[sym].to_regexp() | |||
| regexp = self.token_by_name[sym].pattern.to_regexp() | |||
| width = sre_parse.parse(regexp).getwidth() | |||
| if not width == (1,1): | |||
| raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | |||
| @@ -144,9 +144,28 @@ class Earley_NoLex: | |||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
| return res[0] | |||
| ENGINE_DICT = { | |||
| 'lalr': LALR, | |||
| 'earley': Earley, | |||
| 'earley_nolex': Earley_NoLex, | |||
| 'lalr_contextual_lexer': LALR_ContextualLexer | |||
| } | |||
| def get_frontend(parser, lexer): | |||
| if parser=='lalr': | |||
| if lexer is None: | |||
| raise ValueError('The LALR parser requires use of a lexer') | |||
| elif lexer == 'standard': | |||
| return LALR | |||
| elif lexer == 'contextual': | |||
| return LALR_ContextualLexer | |||
| else: | |||
| raise ValueError('Unknown lexer: %s' % lexer) | |||
| elif parser=='earley': | |||
| if lexer is None: | |||
| return Earley_NoLex | |||
| elif lexer=='standard': | |||
| return Earley | |||
| elif lexer=='contextual': | |||
| raise ValueError('The Earley parser does not support the contextual parser') | |||
| else: | |||
| raise ValueError('Unknown lexer: %s' % lexer) | |||
| else: | |||
| raise ValueError('Unknown parser: %s' % parser) | |||
| @@ -2,8 +2,8 @@ import re | |||
| from collections import defaultdict | |||
| from .tree import Tree | |||
| from .common import is_terminal, ParserConf | |||
| from .lexer import Token, TokenDef__Str | |||
| from .common import is_terminal, ParserConf, PatternStr | |||
| from .lexer import Token | |||
| from .parsers import earley | |||
| from .lark import Lark | |||
| @@ -22,7 +22,7 @@ def is_iter_empty(i): | |||
| class Reconstructor: | |||
| def __init__(self, parser): | |||
| tokens = {t.name:t for t in parser.lexer_conf.tokens} | |||
| token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens} | |||
| token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||
| class MatchData: | |||
| def __init__(self, data): | |||
| @@ -50,8 +50,8 @@ class Reconstructor: | |||
| for sym in self.expansion: | |||
| if is_discarded_terminal(sym): | |||
| t = tokens[sym] | |||
| assert isinstance(t, TokenDef__Str) | |||
| to_write.append(t.value) | |||
| assert isinstance(t.pattern, PatternStr) | |||
| to_write.append(t.pattern.value) | |||
| else: | |||
| x = next(args2) | |||
| if isinstance(x, list): | |||
| @@ -5,7 +5,7 @@ import logging | |||
| from .test_trees import TestTrees | |||
| # from .test_selectors import TestSelectors | |||
| from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers | |||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers | |||
| # from .test_grammars import TestPythonG, TestConfigG | |||
| logging.basicConfig(level=logging.INFO) | |||
| @@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase): | |||
| class TestEarley(unittest.TestCase): | |||
| pass | |||
| def _make_parser_test(PARSER): | |||
| def _make_parser_test(LEXER, PARSER): | |||
| def _Lark(grammar, **kwargs): | |||
| return Lark(grammar, parser=PARSER, **kwargs) | |||
| return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||
| class _TestParser(unittest.TestCase): | |||
| def test_basic1(self): | |||
| g = _Lark("""start: a+ b a* "b" a* | |||
| @@ -397,12 +397,18 @@ def _make_parser_test(PARSER): | |||
| g.parse("+2e-9") | |||
| self.assertRaises(ParseError, g.parse, "+2e-9e") | |||
| _NAME = "Test" + PARSER.capitalize() | |||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize() | |||
| _TestParser.__name__ = _NAME | |||
| globals()[_NAME] = _TestParser | |||
| for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']: | |||
| _make_parser_test(PARSER) | |||
| _TO_TEST = [ | |||
| ('standard', 'earley'), | |||
| ('standard', 'lalr'), | |||
| ('contextual', 'lalr'), | |||
| ] | |||
| for LEXER, PARSER in _TO_TEST: | |||
| _make_parser_test(LEXER, PARSER) | |||
| if __name__ == '__main__': | |||