@@ -19,13 +19,12 @@ parser = Lark(r""" | |||||
start: _NL? section+ | start: _NL? section+ | ||||
section: "[" NAME "]" _NL item+ | section: "[" NAME "]" _NL item+ | ||||
item: NAME "=" VALUE _NL | item: NAME "=" VALUE _NL | ||||
NAME: /[a-zA-Z_]\w*/ | |||||
VALUE: /.*/ | |||||
VALUE: /./* | |||||
%import common.CNAME -> NAME | |||||
%import common.NEWLINE -> _NL | |||||
_NL: /(\r?\n)+/ | |||||
%ignore /[\t \f]+/ | |||||
%ignore /\#[^\n]*/ | |||||
%import common.WS_INLINE | |||||
%ignore WS_INLINE | |||||
""", parser="lalr", lexer="contextual") | """, parser="lalr", lexer="contextual") | ||||
@@ -12,25 +12,21 @@ | |||||
# See examples/conf.py for an example of that approach. | # See examples/conf.py for an example of that approach. | ||||
# | # | ||||
from lark import Lark, Transformer | |||||
from lark import Lark | |||||
parser = Lark(r""" | parser = Lark(r""" | ||||
start: _nl? section+ | |||||
section: "[" name "]" _nl item+ | |||||
item: name "=" value _nl | |||||
name: /[a-zA-Z_]/ /\w/* | |||||
value: /./+ | |||||
_nl: (_CR? _LF)+ | |||||
_CR : /\r/ | |||||
_LF : /\n/ | |||||
start: _NL? section+ | |||||
section: "[" NAME "]" _NL item+ | |||||
item: NAME "=" VALUE _NL | |||||
VALUE: /./* | |||||
%import common.CNAME -> NAME | |||||
%import common.NEWLINE -> _NL | |||||
%import common.WS_INLINE | |||||
%ignore WS_INLINE | |||||
""", lexer=None) | """, lexer=None) | ||||
class RestoreTokens(Transformer): | |||||
value = ''.join | |||||
name = ''.join | |||||
def test(): | def test(): | ||||
sample_conf = """ | sample_conf = """ | ||||
[bla] | [bla] | ||||
@@ -40,7 +36,7 @@ this="that",4 | |||||
""" | """ | ||||
r = parser.parse(sample_conf) | r = parser.parse(sample_conf) | ||||
print(RestoreTokens().transform(r).pretty()) | |||||
print r.pretty() | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
test() | test() |
@@ -39,3 +39,7 @@ CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* | |||||
WS_INLINE: (" "|/\t/)+ | WS_INLINE: (" "|/\t/)+ | ||||
WS: /[ \t\f\r\n]/+ | WS: /[ \t\f\r\n]/+ | ||||
CR : /\r/ | |||||
LF : /\n/ | |||||
NEWLINE: (CR? LF)+ | |||||
@@ -119,21 +119,23 @@ class Lark: | |||||
assert not self.options.profile, "Feature temporarily disabled" | assert not self.options.profile, "Feature temporarily disabled" | ||||
self.profiler = Profiler() if self.options.profile else None | self.profiler = Profiler() if self.options.profile else None | ||||
lexer = self.options.lexer | |||||
if lexer == 'auto': | |||||
if self.options.parser == 'lalr': | |||||
lexer = 'standard' | |||||
elif self.options.parser == 'earley': | |||||
lexer = 'standard' | |||||
self.options.lexer = lexer | |||||
self.grammar = load_grammar(grammar) | self.grammar = load_grammar(grammar) | ||||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=True) | |||||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer)) | |||||
self.ignore_tokens = self.grammar.extra['ignore'] | self.ignore_tokens = self.grammar.extra['ignore'] | ||||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | ||||
if self.options.lexer == 'auto': | |||||
if self.options.parser == 'lalr': | |||||
self.options.lexer = 'standard' | |||||
elif self.options.parser == 'earley': | |||||
self.options.lexer = 'standard' | |||||
if self.options.parser: | if self.options.parser: | ||||
self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
elif self.options.lexer: | |||||
elif lexer: | |||||
self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
if self.profiler: self.profiler.enter_section('outside_lark') | if self.profiler: self.profiler.enter_section('outside_lark') | ||||
@@ -239,6 +239,15 @@ class ExtractAnonTokens(InlineTransformer): | |||||
self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | ||||
self.i = 0 | self.i = 0 | ||||
def range(self, start, end): | |||||
assert start.type == end.type == 'STRING' | |||||
start = start.value[1:-1] | |||||
end = end.value[1:-1] | |||||
assert len(start) == len(end) == 1 | |||||
regexp = '/[%s-%s]/' % (start, end) | |||||
t = Token('REGEXP', regexp) | |||||
return self.tokenvalue(t) | |||||
def tokenvalue(self, token): | def tokenvalue(self, token): | ||||
value = token.value[1:-1] | value = token.value[1:-1] | ||||
if token.type == 'STRING': | if token.type == 'STRING': | ||||
@@ -325,8 +334,19 @@ class Grammar: | |||||
self.extra = extra | self.extra = extra | ||||
def compile(self, lexer=False): | def compile(self, lexer=False): | ||||
assert lexer | |||||
# assert lexer | |||||
if not lexer: | |||||
self.rule_defs += self.token_defs | |||||
self.token_defs = [] | |||||
for name, tree in self.rule_defs: | |||||
for tokenvalue in tree.find_data('tokenvalue'): | |||||
value ,= tokenvalue.children | |||||
if value.type == 'STRING': | |||||
assert value[0] == value[-1] == '"' | |||||
if len(value)>3: | |||||
tokenvalue.data = 'expansion' | |||||
tokenvalue.children = [T('tokenvalue', [Token('STRING', '"%s"'%ch)]) for ch in value[1:-1]] | |||||
tokendefs = list(self.token_defs) | tokendefs = list(self.token_defs) | ||||
# ================= | # ================= | ||||
@@ -6,6 +6,7 @@ from .lexer import Lexer, ContextualLexer, Token | |||||
from .common import is_terminal, GrammarError, ParserConf | from .common import is_terminal, GrammarError, ParserConf | ||||
from .parsers import lalr_parser, earley, nearley | from .parsers import lalr_parser, earley, nearley | ||||
from .parsers.grammar_analysis import Rule | from .parsers.grammar_analysis import Rule | ||||
from .tree import Transformer | |||||
class WithLexer: | class WithLexer: | ||||
def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
@@ -121,10 +122,16 @@ class Nearley_NoLex: | |||||
class Earley_NoLex: | class Earley_NoLex: | ||||
def __init__(self, lexer_conf, parser_conf): | def __init__(self, lexer_conf, parser_conf): | ||||
self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||||
rules = [] | |||||
for name, exp, alias in parser_conf.rules: | |||||
name = self.tokens_to_convert.get(name, name) | |||||
exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||||
rules.append((name, exp, alias)) | |||||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
rules = [(n, list(self._prepare_expansion(x)), a) | |||||
for n,x,a in parser_conf.rules] | |||||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||||
self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | ||||
@@ -142,7 +149,16 @@ class Earley_NoLex: | |||||
def parse(self, text): | def parse(self, text): | ||||
res = self.parser.parse(text) | res = self.parser.parse(text) | ||||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
return res[0] | |||||
res = res[0] | |||||
class RestoreTokens(Transformer): | |||||
pass | |||||
for t in self.tokens_to_convert: | |||||
setattr(RestoreTokens, t, ''.join) | |||||
res = RestoreTokens().transform(res) | |||||
return res | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
@@ -39,9 +39,19 @@ class TestParsers(unittest.TestCase): | |||||
l2 = g.parse('(a,b,c,*x)') | l2 = g.parse('(a,b,c,*x)') | ||||
assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | ||||
def test_earley_nolex(self): | |||||
g = Lark("""start: A "b" c | |||||
A: "a"+ | |||||
c: "abc" | |||||
""", parser="earley", lexer=None) | |||||
x = g.parse('aaaababc') | |||||
class TestEarley(unittest.TestCase): | class TestEarley(unittest.TestCase): | ||||
pass | pass | ||||
def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | ||||