@@ -22,13 +22,13 @@ calc_grammar = """ | |||
| product "*" atom -> mul | |||
| product "/" atom -> div | |||
?atom: DECIMAL -> number | |||
?atom: NUMBER -> number | |||
| "-" atom -> neg | |||
| NAME -> var | |||
| "(" sum ")" | |||
%import common.CNAME -> NAME | |||
%import common.DECIMAL | |||
%import common.NUMBER | |||
%import common.WS_INLINE | |||
%ignore WS_INLINE | |||
@@ -26,7 +26,7 @@ parser = Lark(r""" | |||
%ignore /[\t \f]+/ | |||
%ignore /\#[^\n]*/ | |||
""", parser="lalr_contextual_lexer") | |||
""", parser="lalr", lexer="contextual") | |||
sample_conf = """ | |||
@@ -24,7 +24,7 @@ parser = Lark(r""" | |||
_CR : /\r/ | |||
_LF : /\n/ | |||
""", parser="earley_nolex") | |||
""", lexer=None) | |||
class RestoreTokens(Transformer): | |||
value = ''.join | |||
@@ -16,9 +16,10 @@ tree_grammar = r""" | |||
tree: NAME _NL [_INDENT tree+ _DEDENT] | |||
NAME: /\w+/ | |||
%import common.CNAME -> NAME | |||
%import common.WS_INLINE | |||
%ignore WS_INLINE | |||
WS.ignore: /\s+/ | |||
_NL: /(\r?\n[\t ]*)+/ | |||
_INDENT: "<INDENT>" | |||
_DEDENT: "<DEDENT>" | |||
@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
from .lark import Lark | |||
from .utils import inline_args | |||
__version__ = "0.1.2" | |||
__version__ = "0.2.0" |
@@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf | |||
from .lexer import Lexer | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import ENGINE_DICT | |||
from .parser_frontends import get_frontend | |||
class LarkOptions(object): | |||
"""Specifies the options for Lark | |||
@@ -19,7 +19,13 @@ class LarkOptions(object): | |||
""" | |||
OPTIONS_DOC = """ | |||
parser - Which parser engine to use ("earley" or "lalr". Default: "earley") | |||
Note: Both will use Lark's lexer. | |||
Note: "lalr" requires a lexer | |||
lexer - Whether or not to use a lexer stage | |||
None: Don't use a lexer | |||
"standard": Use a standard lexer | |||
"contextual": Stronger lexer (only works with parser="lalr") | |||
"auto" (default): Choose for me based on grammar and parser | |||
transformer - Applies the transformer to every parse tree | |||
debug - Affects verbosity (default: False) | |||
only_lex - Don't build a parser. Useful for debugging (default: False) | |||
@@ -40,11 +46,12 @@ class LarkOptions(object): | |||
self.cache_grammar = o.pop('cache_grammar', False) | |||
self.postlex = o.pop('postlex', None) | |||
self.parser = o.pop('parser', 'earley') | |||
self.lexer = o.pop('lexer', 'auto') | |||
self.transformer = o.pop('transformer', None) | |||
self.start = o.pop('start', 'start') | |||
self.profile = o.pop('profile', False) | |||
assert self.parser in ENGINE_DICT | |||
# assert self.parser in ENGINE_DICT | |||
if self.parser == 'earley' and self.transformer: | |||
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.' | |||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)') | |||
@@ -118,9 +125,15 @@ class Lark: | |||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
if not self.options.only_lex: | |||
if self.options.lexer == 'auto': | |||
if self.options.parser == 'lalr': | |||
self.options.lexer = 'standard' | |||
elif self.options.parser == 'earley': | |||
self.options.lexer = 'standard' | |||
if self.options.parser: | |||
self.parser = self._build_parser() | |||
else: | |||
elif self.options.lexer: | |||
self.lexer = self._build_lexer() | |||
if self.profiler: self.profiler.enter_section('outside_lark') | |||
@@ -131,7 +144,7 @@ class Lark: | |||
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore) | |||
def _build_parser(self): | |||
self.parser_class = ENGINE_DICT[self.options.parser] | |||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||
if self.profiler: | |||
@@ -88,7 +88,9 @@ class Lexer(object): | |||
raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern)) | |||
token_names = {t.name for t in tokens} | |||
assert all(t in token_names for t in ignore) | |||
for t in ignore: | |||
if t not in token_names: | |||
raise LexError("Token '%s' was marked to ignore but it is not defined!" % t) | |||
# Init | |||
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())] | |||
@@ -131,7 +131,7 @@ class Earley_NoLex: | |||
def _prepare_expansion(self, expansion): | |||
for sym in expansion: | |||
if is_terminal(sym): | |||
regexp = self.token_by_name[sym].to_regexp() | |||
regexp = self.token_by_name[sym].pattern.to_regexp() | |||
width = sre_parse.parse(regexp).getwidth() | |||
if not width == (1,1): | |||
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | |||
@@ -144,9 +144,28 @@ class Earley_NoLex: | |||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
return res[0] | |||
ENGINE_DICT = { | |||
'lalr': LALR, | |||
'earley': Earley, | |||
'earley_nolex': Earley_NoLex, | |||
'lalr_contextual_lexer': LALR_ContextualLexer | |||
} | |||
def get_frontend(parser, lexer): | |||
if parser=='lalr': | |||
if lexer is None: | |||
raise ValueError('The LALR parser requires use of a lexer') | |||
elif lexer == 'standard': | |||
return LALR | |||
elif lexer == 'contextual': | |||
return LALR_ContextualLexer | |||
else: | |||
raise ValueError('Unknown lexer: %s' % lexer) | |||
elif parser=='earley': | |||
if lexer is None: | |||
return Earley_NoLex | |||
elif lexer=='standard': | |||
return Earley | |||
elif lexer=='contextual': | |||
raise ValueError('The Earley parser does not support the contextual parser') | |||
else: | |||
raise ValueError('Unknown lexer: %s' % lexer) | |||
else: | |||
raise ValueError('Unknown parser: %s' % parser) | |||
@@ -2,8 +2,8 @@ import re | |||
from collections import defaultdict | |||
from .tree import Tree | |||
from .common import is_terminal, ParserConf | |||
from .lexer import Token, TokenDef__Str | |||
from .common import is_terminal, ParserConf, PatternStr | |||
from .lexer import Token | |||
from .parsers import earley | |||
from .lark import Lark | |||
@@ -22,7 +22,7 @@ def is_iter_empty(i): | |||
class Reconstructor: | |||
def __init__(self, parser): | |||
tokens = {t.name:t for t in parser.lexer_conf.tokens} | |||
token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens} | |||
token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||
class MatchData: | |||
def __init__(self, data): | |||
@@ -50,8 +50,8 @@ class Reconstructor: | |||
for sym in self.expansion: | |||
if is_discarded_terminal(sym): | |||
t = tokens[sym] | |||
assert isinstance(t, TokenDef__Str) | |||
to_write.append(t.value) | |||
assert isinstance(t.pattern, PatternStr) | |||
to_write.append(t.pattern.value) | |||
else: | |||
x = next(args2) | |||
if isinstance(x, list): | |||
@@ -5,7 +5,7 @@ import logging | |||
from .test_trees import TestTrees | |||
# from .test_selectors import TestSelectors | |||
from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers | |||
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers | |||
# from .test_grammars import TestPythonG, TestConfigG | |||
logging.basicConfig(level=logging.INFO) | |||
@@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase): | |||
class TestEarley(unittest.TestCase): | |||
pass | |||
def _make_parser_test(PARSER): | |||
def _make_parser_test(LEXER, PARSER): | |||
def _Lark(grammar, **kwargs): | |||
return Lark(grammar, parser=PARSER, **kwargs) | |||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||
class _TestParser(unittest.TestCase): | |||
def test_basic1(self): | |||
g = _Lark("""start: a+ b a* "b" a* | |||
@@ -397,12 +397,18 @@ def _make_parser_test(PARSER): | |||
g.parse("+2e-9") | |||
self.assertRaises(ParseError, g.parse, "+2e-9e") | |||
_NAME = "Test" + PARSER.capitalize() | |||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize() | |||
_TestParser.__name__ = _NAME | |||
globals()[_NAME] = _TestParser | |||
for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']: | |||
_make_parser_test(PARSER) | |||
_TO_TEST = [ | |||
('standard', 'earley'), | |||
('standard', 'lalr'), | |||
('contextual', 'lalr'), | |||
] | |||
for LEXER, PARSER in _TO_TEST: | |||
_make_parser_test(LEXER, PARSER) | |||
if __name__ == '__main__': | |||