| @@ -22,13 +22,13 @@ calc_grammar = """ | |||||
| | product "*" atom -> mul | | product "*" atom -> mul | ||||
| | product "/" atom -> div | | product "/" atom -> div | ||||
| ?atom: DECIMAL -> number | |||||
| ?atom: NUMBER -> number | |||||
| | "-" atom -> neg | | "-" atom -> neg | ||||
| | NAME -> var | | NAME -> var | ||||
| | "(" sum ")" | | "(" sum ")" | ||||
| %import common.CNAME -> NAME | %import common.CNAME -> NAME | ||||
| %import common.DECIMAL | |||||
| %import common.NUMBER | |||||
| %import common.WS_INLINE | %import common.WS_INLINE | ||||
| %ignore WS_INLINE | %ignore WS_INLINE | ||||
| @@ -26,7 +26,7 @@ parser = Lark(r""" | |||||
| %ignore /[\t \f]+/ | %ignore /[\t \f]+/ | ||||
| %ignore /\#[^\n]*/ | %ignore /\#[^\n]*/ | ||||
| """, parser="lalr_contextual_lexer") | |||||
| """, parser="lalr", lexer="contextual") | |||||
| sample_conf = """ | sample_conf = """ | ||||
| @@ -24,7 +24,7 @@ parser = Lark(r""" | |||||
| _CR : /\r/ | _CR : /\r/ | ||||
| _LF : /\n/ | _LF : /\n/ | ||||
| """, parser="earley_nolex") | |||||
| """, lexer=None) | |||||
| class RestoreTokens(Transformer): | class RestoreTokens(Transformer): | ||||
| value = ''.join | value = ''.join | ||||
| @@ -16,9 +16,10 @@ tree_grammar = r""" | |||||
| tree: NAME _NL [_INDENT tree+ _DEDENT] | tree: NAME _NL [_INDENT tree+ _DEDENT] | ||||
| NAME: /\w+/ | |||||
| %import common.CNAME -> NAME | |||||
| %import common.WS_INLINE | |||||
| %ignore WS_INLINE | |||||
| WS.ignore: /\s+/ | |||||
| _NL: /(\r?\n[\t ]*)+/ | _NL: /(\r?\n[\t ]*)+/ | ||||
| _INDENT: "<INDENT>" | _INDENT: "<INDENT>" | ||||
| _DEDENT: "<DEDENT>" | _DEDENT: "<DEDENT>" | ||||
| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||||
| from .lark import Lark | from .lark import Lark | ||||
| from .utils import inline_args | from .utils import inline_args | ||||
| __version__ = "0.1.2" | |||||
| __version__ = "0.2.0" | |||||
| @@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf | |||||
| from .lexer import Lexer | from .lexer import Lexer | ||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import ENGINE_DICT | |||||
| from .parser_frontends import get_frontend | |||||
| class LarkOptions(object): | class LarkOptions(object): | ||||
| """Specifies the options for Lark | """Specifies the options for Lark | ||||
| @@ -19,7 +19,13 @@ class LarkOptions(object): | |||||
| """ | """ | ||||
| OPTIONS_DOC = """ | OPTIONS_DOC = """ | ||||
| parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | ||||
| Note: Both will use Lark's lexer. | |||||
| Note: "lalr" requires a lexer | |||||
| lexer - Whether or not to use a lexer stage | |||||
| None: Don't use a lexer | |||||
| "standard": Use a standard lexer | |||||
| "contextual": Stronger lexer (only works with parser="lalr") | |||||
| "auto" (default): Choose for me based on grammar and parser | |||||
| transformer - Applies the transformer to every parse tree | transformer - Applies the transformer to every parse tree | ||||
| debug - Affects verbosity (default: False) | debug - Affects verbosity (default: False) | ||||
| only_lex - Don't build a parser. Useful for debugging (default: False) | only_lex - Don't build a parser. Useful for debugging (default: False) | ||||
| @@ -40,11 +46,12 @@ class LarkOptions(object): | |||||
| self.cache_grammar = o.pop('cache_grammar', False) | self.cache_grammar = o.pop('cache_grammar', False) | ||||
| self.postlex = o.pop('postlex', None) | self.postlex = o.pop('postlex', None) | ||||
| self.parser = o.pop('parser', 'earley') | self.parser = o.pop('parser', 'earley') | ||||
| self.lexer = o.pop('lexer', 'auto') | |||||
| self.transformer = o.pop('transformer', None) | self.transformer = o.pop('transformer', None) | ||||
| self.start = o.pop('start', 'start') | self.start = o.pop('start', 'start') | ||||
| self.profile = o.pop('profile', False) | self.profile = o.pop('profile', False) | ||||
| assert self.parser in ENGINE_DICT | |||||
| # assert self.parser in ENGINE_DICT | |||||
| if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
| raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | ||||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | ||||
| @@ -118,9 +125,15 @@ class Lark: | |||||
| self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | ||||
| if not self.options.only_lex: | |||||
| if self.options.lexer == 'auto': | |||||
| if self.options.parser == 'lalr': | |||||
| self.options.lexer = 'standard' | |||||
| elif self.options.parser == 'earley': | |||||
| self.options.lexer = 'standard' | |||||
| if self.options.parser: | |||||
| self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
| else: | |||||
| elif self.options.lexer: | |||||
| self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
| if self.profiler: self.profiler.enter_section('outside_lark') | if self.profiler: self.profiler.enter_section('outside_lark') | ||||
| @@ -131,7 +144,7 @@ class Lark: | |||||
| return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | ||||
| def _build_parser(self): | def _build_parser(self): | ||||
| self.parser_class = ENGINE_DICT[self.options.parser] | |||||
| self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | ||||
| rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | ||||
| if self.profiler: | if self.profiler: | ||||
| @@ -88,7 +88,9 @@ class Lexer(object): | |||||
| raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | ||||
| token_names = {t.name for t in tokens} | token_names = {t.name for t in tokens} | ||||
| assert all(t in token_names for t in ignore) | |||||
| for t in ignore: | |||||
| if t not in token_names: | |||||
| raise LexError("Token '%s' was marked to ignore but it is not defined!" % t) | |||||
| # Init | # Init | ||||
| self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | ||||
| @@ -131,7 +131,7 @@ class Earley_NoLex: | |||||
| def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
| for sym in expansion: | for sym in expansion: | ||||
| if is_terminal(sym): | if is_terminal(sym): | ||||
| regexp = self.token_by_name[sym].to_regexp() | |||||
| regexp = self.token_by_name[sym].pattern.to_regexp() | |||||
| width = sre_parse.parse(regexp).getwidth() | width = sre_parse.parse(regexp).getwidth() | ||||
| if not width == (1,1): | if not width == (1,1): | ||||
| raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | ||||
| @@ -144,9 +144,28 @@ class Earley_NoLex: | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
| return res[0] | return res[0] | ||||
| ENGINE_DICT = { | |||||
| 'lalr': LALR, | |||||
| 'earley': Earley, | |||||
| 'earley_nolex': Earley_NoLex, | |||||
| 'lalr_contextual_lexer': LALR_ContextualLexer | |||||
| } | |||||
| def get_frontend(parser, lexer): | |||||
| if parser=='lalr': | |||||
| if lexer is None: | |||||
| raise ValueError('The LALR parser requires use of a lexer') | |||||
| elif lexer == 'standard': | |||||
| return LALR | |||||
| elif lexer == 'contextual': | |||||
| return LALR_ContextualLexer | |||||
| else: | |||||
| raise ValueError('Unknown lexer: %s' % lexer) | |||||
| elif parser=='earley': | |||||
| if lexer is None: | |||||
| return Earley_NoLex | |||||
| elif lexer=='standard': | |||||
| return Earley | |||||
| elif lexer=='contextual': | |||||
| raise ValueError('The Earley parser does not support the contextual parser') | |||||
| else: | |||||
| raise ValueError('Unknown lexer: %s' % lexer) | |||||
| else: | |||||
| raise ValueError('Unknown parser: %s' % parser) | |||||
| @@ -2,8 +2,8 @@ import re | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import is_terminal, ParserConf | |||||
| from .lexer import Token, TokenDef__Str | |||||
| from .common import is_terminal, ParserConf, PatternStr | |||||
| from .lexer import Token | |||||
| from .parsers import earley | from .parsers import earley | ||||
| from .lark import Lark | from .lark import Lark | ||||
| @@ -22,7 +22,7 @@ def is_iter_empty(i): | |||||
| class Reconstructor: | class Reconstructor: | ||||
| def __init__(self, parser): | def __init__(self, parser): | ||||
| tokens = {t.name:t for t in parser.lexer_conf.tokens} | tokens = {t.name:t for t in parser.lexer_conf.tokens} | ||||
| token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens} | |||||
| token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||||
| class MatchData: | class MatchData: | ||||
| def __init__(self, data): | def __init__(self, data): | ||||
| @@ -50,8 +50,8 @@ class Reconstructor: | |||||
| for sym in self.expansion: | for sym in self.expansion: | ||||
| if is_discarded_terminal(sym): | if is_discarded_terminal(sym): | ||||
| t = tokens[sym] | t = tokens[sym] | ||||
| assert isinstance(t, TokenDef__Str) | |||||
| to_write.append(t.value) | |||||
| assert isinstance(t.pattern, PatternStr) | |||||
| to_write.append(t.pattern.value) | |||||
| else: | else: | ||||
| x = next(args2) | x = next(args2) | ||||
| if isinstance(x, list): | if isinstance(x, list): | ||||
| @@ -5,7 +5,7 @@ import logging | |||||
| from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
| # from .test_selectors import TestSelectors | # from .test_selectors import TestSelectors | ||||
| from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers | |||||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers | |||||
| # from .test_grammars import TestPythonG, TestConfigG | # from .test_grammars import TestPythonG, TestConfigG | ||||
| logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
| @@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase): | |||||
| class TestEarley(unittest.TestCase): | class TestEarley(unittest.TestCase): | ||||
| pass | pass | ||||
| def _make_parser_test(PARSER): | |||||
| def _make_parser_test(LEXER, PARSER): | |||||
| def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
| return Lark(grammar, parser=PARSER, **kwargs) | |||||
| return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||||
| class _TestParser(unittest.TestCase): | class _TestParser(unittest.TestCase): | ||||
| def test_basic1(self): | def test_basic1(self): | ||||
| g = _Lark("""start: a+ b a* "b" a* | g = _Lark("""start: a+ b a* "b" a* | ||||
| @@ -397,12 +397,18 @@ def _make_parser_test(PARSER): | |||||
| g.parse("+2e-9") | g.parse("+2e-9") | ||||
| self.assertRaises(ParseError, g.parse, "+2e-9e") | self.assertRaises(ParseError, g.parse, "+2e-9e") | ||||
| _NAME = "Test" + PARSER.capitalize() | |||||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize() | |||||
| _TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||
| globals()[_NAME] = _TestParser | globals()[_NAME] = _TestParser | ||||
| for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']: | |||||
| _make_parser_test(PARSER) | |||||
| _TO_TEST = [ | |||||
| ('standard', 'earley'), | |||||
| ('standard', 'lalr'), | |||||
| ('contextual', 'lalr'), | |||||
| ] | |||||
| for LEXER, PARSER in _TO_TEST: | |||||
| _make_parser_test(LEXER, PARSER) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||