@@ -22,13 +22,13 @@ calc_grammar = """ | |||||
| product "*" atom -> mul | | product "*" atom -> mul | ||||
| product "/" atom -> div | | product "/" atom -> div | ||||
?atom: DECIMAL -> number | |||||
?atom: NUMBER -> number | |||||
| "-" atom -> neg | | "-" atom -> neg | ||||
| NAME -> var | | NAME -> var | ||||
| "(" sum ")" | | "(" sum ")" | ||||
%import common.CNAME -> NAME | %import common.CNAME -> NAME | ||||
%import common.DECIMAL | |||||
%import common.NUMBER | |||||
%import common.WS_INLINE | %import common.WS_INLINE | ||||
%ignore WS_INLINE | %ignore WS_INLINE | ||||
@@ -26,7 +26,7 @@ parser = Lark(r""" | |||||
%ignore /[\t \f]+/ | %ignore /[\t \f]+/ | ||||
%ignore /\#[^\n]*/ | %ignore /\#[^\n]*/ | ||||
""", parser="lalr_contextual_lexer") | |||||
""", parser="lalr", lexer="contextual") | |||||
sample_conf = """ | sample_conf = """ | ||||
@@ -24,7 +24,7 @@ parser = Lark(r""" | |||||
_CR : /\r/ | _CR : /\r/ | ||||
_LF : /\n/ | _LF : /\n/ | ||||
""", parser="earley_nolex") | |||||
""", lexer=None) | |||||
class RestoreTokens(Transformer): | class RestoreTokens(Transformer): | ||||
value = ''.join | value = ''.join | ||||
@@ -16,9 +16,10 @@ tree_grammar = r""" | |||||
tree: NAME _NL [_INDENT tree+ _DEDENT] | tree: NAME _NL [_INDENT tree+ _DEDENT] | ||||
NAME: /\w+/ | |||||
%import common.CNAME -> NAME | |||||
%import common.WS_INLINE | |||||
%ignore WS_INLINE | |||||
WS.ignore: /\s+/ | |||||
_NL: /(\r?\n[\t ]*)+/ | _NL: /(\r?\n[\t ]*)+/ | ||||
_INDENT: "<INDENT>" | _INDENT: "<INDENT>" | ||||
_DEDENT: "<DEDENT>" | _DEDENT: "<DEDENT>" | ||||
@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||||
from .lark import Lark | from .lark import Lark | ||||
from .utils import inline_args | from .utils import inline_args | ||||
__version__ = "0.1.2" | |||||
__version__ = "0.2.0" |
@@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf | |||||
from .lexer import Lexer | from .lexer import Lexer | ||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
from .parser_frontends import ENGINE_DICT | |||||
from .parser_frontends import get_frontend | |||||
class LarkOptions(object): | class LarkOptions(object): | ||||
"""Specifies the options for Lark | """Specifies the options for Lark | ||||
@@ -19,7 +19,13 @@ class LarkOptions(object): | |||||
""" | """ | ||||
OPTIONS_DOC = """ | OPTIONS_DOC = """ | ||||
parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | ||||
Note: Both will use Lark's lexer. | |||||
Note: "lalr" requires a lexer | |||||
lexer - Whether or not to use a lexer stage | |||||
None: Don't use a lexer | |||||
"standard": Use a standard lexer | |||||
"contextual": Stronger lexer (only works with parser="lalr") | |||||
"auto" (default): Choose for me based on grammar and parser | |||||
transformer - Applies the transformer to every parse tree | transformer - Applies the transformer to every parse tree | ||||
debug - Affects verbosity (default: False) | debug - Affects verbosity (default: False) | ||||
only_lex - Don't build a parser. Useful for debugging (default: False) | only_lex - Don't build a parser. Useful for debugging (default: False) | ||||
@@ -40,11 +46,12 @@ class LarkOptions(object): | |||||
self.cache_grammar = o.pop('cache_grammar', False) | self.cache_grammar = o.pop('cache_grammar', False) | ||||
self.postlex = o.pop('postlex', None) | self.postlex = o.pop('postlex', None) | ||||
self.parser = o.pop('parser', 'earley') | self.parser = o.pop('parser', 'earley') | ||||
self.lexer = o.pop('lexer', 'auto') | |||||
self.transformer = o.pop('transformer', None) | self.transformer = o.pop('transformer', None) | ||||
self.start = o.pop('start', 'start') | self.start = o.pop('start', 'start') | ||||
self.profile = o.pop('profile', False) | self.profile = o.pop('profile', False) | ||||
assert self.parser in ENGINE_DICT | |||||
# assert self.parser in ENGINE_DICT | |||||
if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | ||||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | ||||
@@ -118,9 +125,15 @@ class Lark: | |||||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | ||||
if not self.options.only_lex: | |||||
if self.options.lexer == 'auto': | |||||
if self.options.parser == 'lalr': | |||||
self.options.lexer = 'standard' | |||||
elif self.options.parser == 'earley': | |||||
self.options.lexer = 'standard' | |||||
if self.options.parser: | |||||
self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
else: | |||||
elif self.options.lexer: | |||||
self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
if self.profiler: self.profiler.enter_section('outside_lark') | if self.profiler: self.profiler.enter_section('outside_lark') | ||||
@@ -131,7 +144,7 @@ class Lark: | |||||
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | ||||
def _build_parser(self): | def _build_parser(self): | ||||
self.parser_class = ENGINE_DICT[self.options.parser] | |||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | ||||
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | ||||
if self.profiler: | if self.profiler: | ||||
@@ -88,7 +88,9 @@ class Lexer(object): | |||||
raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | ||||
token_names = {t.name for t in tokens} | token_names = {t.name for t in tokens} | ||||
assert all(t in token_names for t in ignore) | |||||
for t in ignore: | |||||
if t not in token_names: | |||||
raise LexError("Token '%s' was marked to ignore but it is not defined!" % t) | |||||
# Init | # Init | ||||
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | ||||
@@ -131,7 +131,7 @@ class Earley_NoLex: | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
for sym in expansion: | for sym in expansion: | ||||
if is_terminal(sym): | if is_terminal(sym): | ||||
regexp = self.token_by_name[sym].to_regexp() | |||||
regexp = self.token_by_name[sym].pattern.to_regexp() | |||||
width = sre_parse.parse(regexp).getwidth() | width = sre_parse.parse(regexp).getwidth() | ||||
if not width == (1,1): | if not width == (1,1): | ||||
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | ||||
@@ -144,9 +144,28 @@ class Earley_NoLex: | |||||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
return res[0] | return res[0] | ||||
ENGINE_DICT = { | |||||
'lalr': LALR, | |||||
'earley': Earley, | |||||
'earley_nolex': Earley_NoLex, | |||||
'lalr_contextual_lexer': LALR_ContextualLexer | |||||
} | |||||
def get_frontend(parser, lexer): | |||||
if parser=='lalr': | |||||
if lexer is None: | |||||
raise ValueError('The LALR parser requires use of a lexer') | |||||
elif lexer == 'standard': | |||||
return LALR | |||||
elif lexer == 'contextual': | |||||
return LALR_ContextualLexer | |||||
else: | |||||
raise ValueError('Unknown lexer: %s' % lexer) | |||||
elif parser=='earley': | |||||
if lexer is None: | |||||
return Earley_NoLex | |||||
elif lexer=='standard': | |||||
return Earley | |||||
elif lexer=='contextual': | |||||
raise ValueError('The Earley parser does not support the contextual parser') | |||||
else: | |||||
raise ValueError('Unknown lexer: %s' % lexer) | |||||
else: | |||||
raise ValueError('Unknown parser: %s' % parser) | |||||
@@ -2,8 +2,8 @@ import re | |||||
from collections import defaultdict | from collections import defaultdict | ||||
from .tree import Tree | from .tree import Tree | ||||
from .common import is_terminal, ParserConf | |||||
from .lexer import Token, TokenDef__Str | |||||
from .common import is_terminal, ParserConf, PatternStr | |||||
from .lexer import Token | |||||
from .parsers import earley | from .parsers import earley | ||||
from .lark import Lark | from .lark import Lark | ||||
@@ -22,7 +22,7 @@ def is_iter_empty(i): | |||||
class Reconstructor: | class Reconstructor: | ||||
def __init__(self, parser): | def __init__(self, parser): | ||||
tokens = {t.name:t for t in parser.lexer_conf.tokens} | tokens = {t.name:t for t in parser.lexer_conf.tokens} | ||||
token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens} | |||||
token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||||
class MatchData: | class MatchData: | ||||
def __init__(self, data): | def __init__(self, data): | ||||
@@ -50,8 +50,8 @@ class Reconstructor: | |||||
for sym in self.expansion: | for sym in self.expansion: | ||||
if is_discarded_terminal(sym): | if is_discarded_terminal(sym): | ||||
t = tokens[sym] | t = tokens[sym] | ||||
assert isinstance(t, TokenDef__Str) | |||||
to_write.append(t.value) | |||||
assert isinstance(t.pattern, PatternStr) | |||||
to_write.append(t.pattern.value) | |||||
else: | else: | ||||
x = next(args2) | x = next(args2) | ||||
if isinstance(x, list): | if isinstance(x, list): | ||||
@@ -5,7 +5,7 @@ import logging | |||||
from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
# from .test_selectors import TestSelectors | # from .test_selectors import TestSelectors | ||||
from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers | |||||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers | |||||
# from .test_grammars import TestPythonG, TestConfigG | # from .test_grammars import TestPythonG, TestConfigG | ||||
logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
@@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase): | |||||
class TestEarley(unittest.TestCase): | class TestEarley(unittest.TestCase): | ||||
pass | pass | ||||
def _make_parser_test(PARSER): | |||||
def _make_parser_test(LEXER, PARSER): | |||||
def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
return Lark(grammar, parser=PARSER, **kwargs) | |||||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||||
class _TestParser(unittest.TestCase): | class _TestParser(unittest.TestCase): | ||||
def test_basic1(self): | def test_basic1(self): | ||||
g = _Lark("""start: a+ b a* "b" a* | g = _Lark("""start: a+ b a* "b" a* | ||||
@@ -397,12 +397,18 @@ def _make_parser_test(PARSER): | |||||
g.parse("+2e-9") | g.parse("+2e-9") | ||||
self.assertRaises(ParseError, g.parse, "+2e-9e") | self.assertRaises(ParseError, g.parse, "+2e-9e") | ||||
_NAME = "Test" + PARSER.capitalize() | |||||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize() | |||||
_TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||
globals()[_NAME] = _TestParser | globals()[_NAME] = _TestParser | ||||
for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']: | |||||
_make_parser_test(PARSER) | |||||
_TO_TEST = [ | |||||
('standard', 'earley'), | |||||
('standard', 'lalr'), | |||||
('contextual', 'lalr'), | |||||
] | |||||
for LEXER, PARSER in _TO_TEST: | |||||
_make_parser_test(LEXER, PARSER) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||